diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml
index 1a2f5ab..402a66f 100644
--- a/.github/workflows/run_tests.yml
+++ b/.github/workflows/run_tests.yml
@@ -12,7 +12,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
- python-version: [3.6, 3.7, 3.8]
+ python-version: [3.8, 3.9]
steps:
- uses: actions/checkout@v2
@@ -23,7 +23,7 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
- pip install -r requirements.txt
+ pip install .
- name: Lint with flake8
run: |
pip install flake8
@@ -33,5 +33,5 @@ jobs:
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Test with pytest
run: |
- pip install pytest
+ pip install -r tests/requirements.txt
pytest
diff --git a/.gitignore b/.gitignore
index 1f764ff..3e30f77 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,6 +5,7 @@ laptop_env/
worker*/
*.dirlock
*.lock
+notes/
# Byte-compiled / optimized / DLL files
__pycache__/
diff --git a/README.md b/README.md
index abea51c..0ae6ef9 100644
--- a/README.md
+++ b/README.md
@@ -39,16 +39,14 @@ from sklearn.datasets import make_blobs
from incremental_trees.models.classification.streaming_rfc import StreamingRFC
# Generate some data in memory
-x, y = make_blobs(n_samples=int(2e5), random_state=0, n_features=40,
- centers=2, cluster_std=100)
+x, y = make_blobs(n_samples=int(2e5), random_state=0, n_features=40, centers=2, cluster_std=100)
srfc = StreamingRFC(n_estimators_per_chunk=3,
max_n_estimators=np.inf,
spf_n_fits=30, # Number of calls to .partial_fit()
spf_sample_prop=0.3) # Number of rows to sample each on .partial_fit()
-srfc.fit(x, y,
- sample_weight=np.ones_like(y)) # Optional, gets sampled along with the data
+srfc.fit(x, y, sample_weight=np.ones_like(y)) # Optional, gets sampled along with the data
# Should be n_estimators_per_chunk * spf_n_fits
print(len(srfc.estimators_))
@@ -96,7 +94,7 @@ For example, this can be used to feed .partial_fit() sequentially (although belo
````python
import numpy as np
from sklearn.datasets import make_blobs
-from incremental_trees.trees import StreamingRFC
+from incremental_trees.models.classification.streaming_rfc import StreamingRFC
srfc = StreamingRFC(n_estimators_per_chunk=20,
max_n_estimators=np.inf,
@@ -110,11 +108,11 @@ x, y = make_blobs(n_samples=int(2e5), random_state=0, n_features=40,
n_chunks = 30
chunk_size = int(2e3)
for i in range(n_chunks):
- sample_idx = np.random.randint(0, x.shape[0], chunk_size)
- # Call .partial_fit(), specifying expected classes, also supports other .fit args such as sample_weight
- srfc.partial_fit(x[sample_idx, :], y[sample_idx],
- classes=np.unique(y))
-
+ sample_idx = np.random.randint(0, x.shape[0], chunk_size)
+ # Call .partial_fit(), specifying expected classes, also supports other .fit args such as sample_weight
+ srfc.partial_fit(x[sample_idx, :], y[sample_idx],
+ classes=np.unique(y))
+
# Should be n_chunks * n_estimators_per_chunk
print(len(srfc.estimators_))
print(srfc.score(x, y))
@@ -126,17 +124,17 @@ There are a couple of different model setups worth considering. No idea which wo
#### "Incremental forest"
For the number of chunks/fits, sample rows from X, then fit a number of single trees (with different column subsets), eg.
````python
-srfc = StreamingRFC(n_estimators_per_chunk=10,
- max_features='sqrt')
+srfc = StreamingRFC(n_estimators_per_chunk=10, max_features='sqrt')
````
#### "Incremental decision trees"
Single (or few) decision trees per data subset, with all features.
````python
-srfc = StreamingRFC(n_estimators_per_chunk=1,
- max_features=x.shape[1])
+srfc = StreamingRFC(n_estimators_per_chunk=1, max_features=x.shape[1])
````
# Version history
+## v0.6.0
+ - Update to work with scikit-learn==1.2, dask==2022.12, dask-glm==0.2.0, dask-ml==2022.5.27. Support python 3.8 and 3.9.
## v0.5.1
- Add support for passing fit args/kwargs via `.fit` (specifically, `sample_weight`)
## v0.5.0
diff --git a/example_dask.py b/example_dask.py
deleted file mode 100644
index 6903f1a..0000000
--- a/example_dask.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import dask as dd
-import dask_ml.cluster
-import dask_ml.datasets
-import numpy as np
-from dask.distributed import Client, LocalCluster
-from dask_ml.wrappers import Incremental
-
-from incremental_trees.trees import StreamingRFC
-
-
-def run_on_blobs():
- x, y = dask_ml.datasets.make_blobs(n_samples=1e8,
- chunks=1e5,
- random_state=0,
- centers=3)
-
- x = dd.dataframe.from_array(x)
- y = dd.dataframe.from_array(y)
-
- print(f"Rows: {x.shape[0].compute()}")
-
- ests_per_chunk = 4
- chunks = len(x.divisions)
-
- srfc = Incremental(StreamingRFC(n_estimators_per_chunk=ests_per_chunk,
- max_n_estimators=np.inf,
- verbose=1,
- n_jobs=4))
- srfc.fit(x, y,
- classes=y.unique().compute())
-
-
-# Create, connect, and run on local cluster.
-with LocalCluster(processes=False,
- n_workers=2,
- threads_per_worker=2,
- scheduler_port=8080,
- diagnostics_port=8081) as cluster, Client(cluster) as client:
- print(client)
- run_on_blobs()
diff --git a/example_fit.py b/example_fit.py
deleted file mode 100644
index bb26fbb..0000000
--- a/example_fit.py
+++ /dev/null
@@ -1,21 +0,0 @@
-import numpy as np
-from sklearn.datasets import make_blobs
-
-from incremental_trees.models.classification.streaming_rfc import StreamingRFC
-
-if __name__ == "__main__":
- # Generate some data in memory
- x, y = make_blobs(n_samples=int(2e5), random_state=0, n_features=40,
- centers=2, cluster_std=100)
-
- srfc = StreamingRFC(n_estimators_per_chunk=3,
- max_n_estimators=np.inf,
- spf_n_fits=30, # Number of calls to .partial_fit()
- spf_sample_prop=0.3) # Number of rows to sample each on .partial_fit()
-
- srfc.fit(x, y,
- sample_weight=np.ones_like(y)) # Optional
-
- # Should be n_estimators_per_chunk * spf_n_fits
- print(len(srfc.estimators_))
- print(srfc.score(x, y))
diff --git a/incremental_trees/__init__.py b/incremental_trees/__init__.py
index 93b60a1..ef7eb44 100644
--- a/incremental_trees/__init__.py
+++ b/incremental_trees/__init__.py
@@ -1 +1 @@
-__version__ = '0.5.1'
+__version__ = '0.6.0'
diff --git a/incremental_trees/models/classification/streaming_extc.py b/incremental_trees/models/classification/streaming_extc.py
index 9f82686..4ec891e 100644
--- a/incremental_trees/models/classification/streaming_extc.py
+++ b/incremental_trees/models/classification/streaming_extc.py
@@ -1,3 +1,5 @@
+from typing import Optional, Dict, Union
+
import numpy as np
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.tree import ExtraTreeClassifier
@@ -10,52 +12,54 @@ class StreamingEXTC(ClassifierAdditions, ClassifierOverloads, ExtraTreesClassifi
"""Overload sklearn.ensemble.ExtraTreesClassifier to add partial fit method and new params."""
def __init__(self,
+ criterion: str = "gini",
+ max_depth: Optional[int] = None,
+ min_samples_split: int = 2,
+ min_samples_leaf: int = 1,
+ min_weight_fraction_leaf: float = 0.0,
+ max_features: float = 1.0,
+ max_leaf_nodes: Optional[int] = None,
+ min_impurity_decrease: float = 0.0,
+ bootstrap: bool = False,
+ oob_score: bool = False,
+ n_jobs: Optional[int] = None,
+ random_state: Optional[int] = None,
+ verbose: int = 0,
+ warm_start: bool = True,
+ class_weight: Optional[Union[str, Dict]] = None,
+ ccp_alpha: float = 0.0,
+ max_samples: Optional[float] = None,
n_estimators_per_chunk: int = 1,
- n_estimators: bool = None,
- max_n_estimators=np.inf,
- criterion="gini",
- max_depth=None,
- min_samples_split=2,
- min_samples_leaf=1,
- min_weight_fraction_leaf=0.,
- max_features="auto",
- max_leaf_nodes=None,
- min_impurity_decrease=0.,
- min_impurity_split=None,
- bootstrap=False,
- oob_score=False,
- n_jobs=None,
- random_state=None,
- verbose=0,
- warm_start=True,
- class_weight=None,
+ max_n_estimators: float = np.inf,
dask_feeding: bool = True,
- spf_n_fits=100,
- spf_sample_prop: float = 0.1):
+ spf_n_fits: int = 100,
+ spf_sample_prop: float = 0.1
+ ):
super(ExtraTreesClassifier, self).__init__(
- base_estimator=ExtraTreeClassifier(),
+ estimator=ExtraTreeClassifier(),
n_estimators=n_estimators_per_chunk,
estimator_params=("criterion", "max_depth", "min_samples_split",
"min_samples_leaf", "min_weight_fraction_leaf",
"max_features", "max_leaf_nodes",
- "min_impurity_decrease", "min_impurity_split",
- "random_state"),
+ "min_impurity_decrease",
+ "random_state", "ccp_alpha"),
bootstrap=bootstrap,
oob_score=oob_score,
n_jobs=n_jobs,
random_state=random_state,
verbose=verbose,
warm_start=warm_start,
- class_weight=class_weight)
+ class_weight=class_weight,
+ max_samples=max_samples
+ )
self.max_n_estimators: int = None
self._fit_estimators: int = 0
self.classes_: np.array = None # NB: Needs to be array, not list.
self.n_classes_: int = None
-
self._fit_estimators = 0
self.max_n_estimators = max_n_estimators
- self.n_estimators_per_chunk = n_estimators
+ self.n_estimators_per_chunk = n_estimators_per_chunk
self.criterion = criterion
self.max_depth = max_depth
self.min_samples_split = min_samples_split
@@ -64,7 +68,8 @@ def __init__(self,
self.max_features = max_features
self.max_leaf_nodes = max_leaf_nodes
self.min_impurity_decrease = min_impurity_decrease
- self.min_impurity_split = min_impurity_split
+ self.ccp_alpha = ccp_alpha
+ self.max_samples = max_samples
# Set additional params.
self.set_params(n_estimators_per_chunk=n_estimators_per_chunk,
diff --git a/incremental_trees/models/classification/streaming_rfc.py b/incremental_trees/models/classification/streaming_rfc.py
index 54199b3..902cc2b 100644
--- a/incremental_trees/models/classification/streaming_rfc.py
+++ b/incremental_trees/models/classification/streaming_rfc.py
@@ -1,4 +1,4 @@
-import warnings
+from typing import Optional, Union, Dict, List
import numpy as np
from sklearn.ensemble import RandomForestClassifier
@@ -15,27 +15,28 @@ class StreamingRFC(ClassifierAdditions, ClassifierOverloads, RandomForestClassif
"""
def __init__(self,
- bootstrap=True,
- class_weight=None,
- criterion='gini',
- max_depth=None,
- max_features='auto',
- max_leaf_nodes=None,
- min_impurity_decrease=0.0,
- min_impurity_split=None,
- min_samples_leaf=1,
- min_samples_split=2,
- min_weight_fraction_leaf=0.0,
- n_estimators_per_chunk: int = 1,
- n_jobs=None,
- oob_score=False,
- random_state=None,
- verbose=0,
+ criterion: str = 'gini',
+ max_depth: Optional[int] = None,
+ min_samples_split: int = 2,
+ min_samples_leaf: int = 1,
+ min_weight_fraction_leaf: float = 0.0,
+ max_features: Optional[str] = 'sqrt',
+ max_leaf_nodes: Optional[int] = None,
+ min_impurity_decrease: float = 0.0,
+ bootstrap: bool = True,
+ oob_score: bool = False,
+ n_jobs: Optional[int] = None,
+ random_state: Optional[int] = None,
+ verbose: int = 0,
warm_start: bool = True,
+ class_weight: Optional[Union[str, Dict, List[Dict]]] = None,
+ ccp_alpha: float = 0.0,
+ max_samples: Optional[int] = None,
dask_feeding: bool = True,
- max_n_estimators=10,
- spf_n_fits=100,
- spf_sample_prop=0.1) -> None:
+ n_estimators_per_chunk: int = 1,
+ max_n_estimators: int = 10,
+ spf_n_fits: int = 100,
+ spf_sample_prop: float = 0.1) -> None:
"""
:param bootstrap:
:param class_weight:
@@ -44,7 +45,6 @@ def __init__(self,
:param max_features:
:param max_leaf_nodes:
:param min_impurity_decrease:
- :param min_impurity_split:
:param min_samples_leaf:
:param min_samples_split:
:param min_weight_fraction_leaf:
@@ -73,7 +73,6 @@ def __init__(self,
max_features=max_features,
max_leaf_nodes=max_leaf_nodes,
min_impurity_decrease=min_impurity_decrease,
- min_impurity_split=min_impurity_split,
min_samples_leaf=min_samples_leaf,
min_samples_split=min_samples_split,
min_weight_fraction_leaf=min_weight_fraction_leaf,
@@ -89,4 +88,7 @@ def __init__(self,
max_n_estimators=max_n_estimators,
verb=0,
spf_n_fits=spf_n_fits,
- spf_sample_prop=spf_sample_prop)
+ spf_sample_prop=spf_sample_prop,
+ ccp_alpha=ccp_alpha,
+ max_samples=max_samples
+ )
diff --git a/incremental_trees/models/regression/streaming_extr.py b/incremental_trees/models/regression/streaming_extr.py
index 6d565d2..3a05509 100644
--- a/incremental_trees/models/regression/streaming_extr.py
+++ b/incremental_trees/models/regression/streaming_extr.py
@@ -1,3 +1,5 @@
+from typing import Optional, Union
+
import numpy as np
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.tree import ExtraTreeRegressor
@@ -8,45 +10,46 @@
class StreamingEXTR(RegressorAdditions, RegressorOverloads, ExtraTreesRegressor):
def __init__(self,
+ criterion: str = "squared_error",
+ max_depth: Optional[int] = None,
+ min_samples_split: int = 2,
+ min_samples_leaf: int = 1,
+ min_weight_fraction_leaf: float = 0.0,
+ max_features: Union[str, float] = 1.0,
+ max_leaf_nodes: Optional[int] = None,
+ min_impurity_decrease: float = 0.0,
+ bootstrap: bool = False,
+ oob_score: bool = False,
+ n_jobs: Optional[int] = None,
+ random_state: Optional[int] = None,
+ verbose: int = 0,
+ warm_start: bool = True,
+ ccp_alpha: float = 0.0,
+ max_samples: Optional[float] = None,
+ max_n_estimators: int = np.inf,
n_estimators_per_chunk: int = 1,
- n_estimators='warn',
- max_n_estimators=np.inf,
- criterion="mse",
- max_depth=None,
- min_samples_split=2,
- min_samples_leaf=1,
- min_weight_fraction_leaf=0.,
- max_features="auto",
- max_leaf_nodes=None,
- min_impurity_decrease=0.,
- min_impurity_split=None,
- bootstrap=False,
- oob_score=False,
- n_jobs=None,
- random_state=None,
- verbose=0,
- warm_start=True,
dask_feeding: bool = True,
spf_n_fits: int = 100,
spf_sample_prop: float = 0.1):
super(ExtraTreesRegressor, self).__init__(
- base_estimator=ExtraTreeRegressor(),
+ estimator=ExtraTreeRegressor(),
n_estimators=n_estimators_per_chunk,
estimator_params=("criterion", "max_depth", "min_samples_split",
"min_samples_leaf", "min_weight_fraction_leaf",
"max_features", "max_leaf_nodes",
- "min_impurity_decrease", "min_impurity_split",
- "random_state"),
+ "min_impurity_decrease", "random_state", "ccp_alpha"),
bootstrap=bootstrap,
oob_score=oob_score,
n_jobs=n_jobs,
random_state=random_state,
verbose=verbose,
- warm_start=warm_start)
+ warm_start=warm_start,
+ max_samples=max_samples
+ )
self._fit_estimators = 0
self.max_n_estimators = max_n_estimators
- self.n_estimators_per_chunk = n_estimators
+ self.n_estimators_per_chunk = n_estimators_per_chunk
self.criterion = criterion
self.max_depth = max_depth
self.min_samples_split = min_samples_split
@@ -55,7 +58,8 @@ def __init__(self,
self.max_features = max_features
self.max_leaf_nodes = max_leaf_nodes
self.min_impurity_decrease = min_impurity_decrease
- self.min_impurity_split = min_impurity_split
+ self.ccp_alpha = ccp_alpha
+ self.max_samples = max_samples
# Set additional params.
self.set_params(n_estimators_per_chunk=n_estimators_per_chunk,
diff --git a/incremental_trees/models/regression/streaming_rfr.py b/incremental_trees/models/regression/streaming_rfr.py
index a028c93..bdc811a 100644
--- a/incremental_trees/models/regression/streaming_rfr.py
+++ b/incremental_trees/models/regression/streaming_rfr.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
@@ -9,35 +11,32 @@ class StreamingRFR(RegressorAdditions, RegressorOverloads, RandomForestRegressor
"""Overload sklearn.ensemble.RandomForestClassifier to add partial fit method and new params."""
def __init__(self,
- n_estimators='warn',
- criterion="mse",
- max_depth=None,
- min_samples_split=2,
- min_samples_leaf=1,
- min_weight_fraction_leaf=0.,
- max_features="auto",
- max_leaf_nodes=None,
- min_impurity_decrease=0.,
- min_impurity_split=None,
- bootstrap=True,
- oob_score=False,
- n_jobs=None,
- random_state=None,
- verbose=0,
+ criterion: str = "squared_error",
+ max_depth: Optional[int] = None,
+ min_samples_split: int = 2,
+ min_samples_leaf: float = 1,
+ min_weight_fraction_leaf: float = 0.0,
+ max_features: Optional[float] = 1.0,
+ max_leaf_nodes: Optional[int] = None,
+ min_impurity_decrease: float = 0.0,
+ bootstrap: bool = True,
+ oob_score: bool = False,
+ n_jobs: Optional[int] = None,
+ random_state: Optional[int] = None,
+ verbose: int = 0,
n_estimators_per_chunk: int = 1,
warm_start: bool = True,
dask_feeding: bool = True,
- max_n_estimators=10,
- spf_n_fits=100,
- spf_sample_prop=0.1):
+ max_n_estimators: int = 10,
+ spf_n_fits: int = 100,
+ spf_sample_prop: float = 0.1):
super(RandomForestRegressor, self).__init__(
- base_estimator=DecisionTreeRegressor(),
+ estimator=DecisionTreeRegressor(),
n_estimators=n_estimators_per_chunk,
estimator_params=("criterion", "max_depth", "min_samples_split",
"min_samples_leaf", "min_weight_fraction_leaf",
"max_features", "max_leaf_nodes",
- "min_impurity_decrease", "min_impurity_split",
- "random_state"),
+ "min_impurity_decrease", "random_state"),
bootstrap=bootstrap,
oob_score=oob_score,
n_jobs=n_jobs,
@@ -47,7 +46,7 @@ def __init__(self,
self._fit_estimators = 0
self.max_n_estimators = max_n_estimators
- self.n_estimators_per_chunk = n_estimators
+ self.n_estimators_per_chunk = n_estimators_per_chunk
self.criterion = criterion
self.max_depth = max_depth
self.min_samples_split = min_samples_split
@@ -56,7 +55,6 @@ def __init__(self,
self.max_features = max_features
self.max_leaf_nodes = max_leaf_nodes
self.min_impurity_decrease = min_impurity_decrease
- self.min_impurity_split = min_impurity_split
# Set additional params.
self.set_params(n_estimators_per_chunk=n_estimators_per_chunk,
diff --git a/incremental_trees/trees.py b/incremental_trees/trees.py
deleted file mode 100644
index 4d29adf..0000000
--- a/incremental_trees/trees.py
+++ /dev/null
@@ -1,90 +0,0 @@
-import numpy as np
-
-from incremental_trees.models.classification.streaming_extc import StreamingEXTC
-from incremental_trees.models.classification.streaming_rfc import StreamingRFC
-from incremental_trees.models.regression.streaming_extr import StreamingEXTR
-from incremental_trees.models.regression.streaming_rfr import StreamingRFR
-
-
-def bunch_of_examples():
- from sklearn.datasets import make_blobs, make_regression
-
- x, y = make_regression(n_samples=int(2e5),
- random_state=0,
- n_features=40)
-
- srfr = StreamingRFR(n_estimators_per_chunk=5,
- spf_n_fits=10,
- dask_feeding=False,
- verbose=0,
- n_jobs=2)
-
- srfr.fit(x, y)
-
- # Fit 10 regressors
- for _ in range(10):
- x, y = make_regression(n_samples=int(2e5),
- random_state=0,
- n_features=40)
-
- srfr = StreamingRFR(n_estimators_per_chunk=5,
- max_n_estimators=100,
- verbose=0,
- n_jobs=5)
-
- chunk_size = int(2e3)
- for _ in range(20):
- sample_idx = np.random.randint(0, x.shape[0], chunk_size)
- srfr.partial_fit(x[sample_idx], y[sample_idx],
- classes=np.unique(y))
-
- print(f"SRFR: {srfr.score(x, y)}")
-
- sext = StreamingEXTR(n_estimators_per_chunk=5,
- max_n_estimators=100,
- verbose=0,
- n_jobs=5)
-
- for _ in range(20):
- sample_idx = np.random.randint(0, x.shape[0], chunk_size)
- sext.partial_fit(x[sample_idx], y[sample_idx],
- classes=np.unique(y))
-
- print(f"SEXTR: {sext.score(x, y)}")
-
- # Fit 10 classifiers
- for _ in range(10):
- x, y = make_blobs(n_samples=int(2e5),
- random_state=0,
- n_features=40,
- centers=2,
- cluster_std=100)
-
- srfc = StreamingRFC(n_estimators_per_chunk=5,
- max_n_estimators=100,
- verbose=0,
- n_jobs=5)
-
- chunk_size = int(2e3)
- for _ in range(20):
- sample_idx = np.random.randint(0, x.shape[0], chunk_size)
- srfc.partial_fit(x[sample_idx], y[sample_idx],
- classes=np.unique(y))
-
- print(f"SRFC: {srfc.score(x, y)}")
-
- sext = StreamingEXTC(n_estimators_per_chunk=5,
- max_n_estimators=100,
- verbose=0,
- n_jobs=5)
-
- for _ in range(20):
- sample_idx = np.random.randint(0, x.shape[0], chunk_size)
- sext.partial_fit(x[sample_idx], y[sample_idx],
- classes=np.unique(y))
-
- print(f"SEXTC: {sext.score(x, y)}")
-
-
-if __name__ == '__main__':
- bunch_of_examples()
diff --git a/notes/InconsistentClasses.ipynb b/notes/InconsistentClasses.ipynb
deleted file mode 100644
index dd59dd7..0000000
--- a/notes/InconsistentClasses.ipynb
+++ /dev/null
@@ -1,240 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Change dir to repo root if running from repo (rather than pip installed)\n",
- "# (Assuming running from [repo]/notes/)\n",
- "import os\n",
- "os.chdir('../')\n",
- "\n",
- "%load_ext autoreload\n",
- "%autoreload 2"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [],
- "source": [
- "import pandas as pd"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " a | \n",
- " b | \n",
- " c | \n",
- " target | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 2 | \n",
- " 2 | \n",
- " 2 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 2 | \n",
- " 2 | \n",
- " 2 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 3 | \n",
- " 3 | \n",
- " 3 | \n",
- " 2 | \n",
- "
\n",
- " \n",
- " 5 | \n",
- " 4 | \n",
- " 4 | \n",
- " 4 | \n",
- " 2 | \n",
- "
\n",
- " \n",
- " 6 | \n",
- " 3 | \n",
- " 3 | \n",
- " 3 | \n",
- " 2 | \n",
- "
\n",
- " \n",
- " 7 | \n",
- " 4 | \n",
- " 4 | \n",
- " 4 | \n",
- " 2 | \n",
- "
\n",
- " \n",
- " 8 | \n",
- " 5 | \n",
- " 5 | \n",
- " 5 | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " 9 | \n",
- " 5 | \n",
- " 5 | \n",
- " 5 | \n",
- " 3 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " a b c target\n",
- "0 1 1 1 1\n",
- "1 2 2 2 1\n",
- "2 1 1 1 1\n",
- "3 2 2 2 1\n",
- "4 3 3 3 2\n",
- "5 4 4 4 2\n",
- "6 3 3 3 2\n",
- "7 4 4 4 2\n",
- "8 5 5 5 3\n",
- "9 5 5 5 3"
- ]
- },
- "execution_count": 7,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "data= pd.DataFrame({'a': (1, 2, 3, 4, 5), \n",
- " 'b': (1, 2, 3, 4, 5),\n",
- " 'c': (1, 2, 3, 4, 5),\n",
- " 'target': (1, 1, 2, 2, 3)})\n",
- "\n",
- "data = pd.concat((data, data), \n",
- " axis=0).sort_values('target').reset_index(drop=True)\n",
- "\n",
- "x = data[[c for c in data if c != 'target']]\n",
- "y = data['target']\n",
- "\n",
- "data"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {},
- "outputs": [],
- "source": [
- "from incremental_trees.trees import StreamingRFC\n",
- "\n",
- "srfc = StreamingRFC()\n",
- "srfc.partial_fit(x[0:7], y[0:7], # No 3s\n",
- " classes=y.unique())"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "metadata": {},
- "outputs": [],
- "source": [
- "srfc.partial_fit(x, y)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/mnt/s/OneDrive/Matlab/dask tests/IncrementalTrees/incremental_trees/trees.py:196: RuntimeWarning: invalid value encountered in true_divide\n",
- " norm_prob = preds / counts\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "array([1, 1, 1, 1, 2, 2, 2, 2, 3, 3])"
- ]
- },
- "execution_count": 10,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "srfc.predict(x)"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.6.7"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/notes/PerformanceComparisons.ipynb b/notes/PerformanceComparisons.ipynb
deleted file mode 100644
index 2c61669..0000000
--- a/notes/PerformanceComparisons.ipynb
+++ /dev/null
@@ -1,860 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Performamce comparison\n",
- "\n",
- "In memory, no dask."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Change dir to repo root if running from repo (rather than pip installed)\n",
- "# (Assuming running from [repo]/notes/)\n",
- "import os\n",
- "os.chdir('../')\n",
- "\n",
- "%load_ext autoreload\n",
- "%autoreload 2"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [],
- "source": [
- "import matplotlib.pyplot as plt\n",
- "import numpy as np\n",
- "\n",
- "from typing import Tuple\n",
- "\n",
- "from incremental_trees.trees import StreamingRFC\n",
- "\n",
- "from sklearn.ensemble import RandomForestClassifier\n",
- "from sklearn.datasets import make_blobs\n",
- "from sklearn.model_selection import train_test_split\n",
- "from sklearn.metrics import roc_auc_score"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Settings\n",
- "MAX_ESTIMATORS = 120 # Lower to run faster"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Synthetic data\n",
- "\n",
- "20000 samples, 2 classes, 40 features."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [],
- "source": [
- "x, y = make_blobs(n_samples=20000,\n",
- " centers=2,\n",
- " cluster_std=100,\n",
- " n_features=40,\n",
- " random_state=0)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Default params"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Standard random forest"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [],
- "source": [
- "def score(mod, \n",
- " train: Tuple[np.array, np.array],\n",
- " test: Tuple[np.array, np.array],\n",
- " pr=False) -> Tuple[float, float]:\n",
- " \"\"\"\n",
- " Return ROC auc on x_train and x_test (from caller) on mod. Print if requested.\n",
- " \"\"\"\n",
- " y_pred_train_proba = mod.predict_proba(train[0])[:, 1]\n",
- " y_pred_test_proba = mod.predict_proba(test[0])[:, 1]\n",
- "\n",
- " roc_train = roc_auc_score(train[1], y_pred_train_proba)\n",
- " roc_test = roc_auc_score(test[1], y_pred_test_proba)\n",
- " if pr:\n",
- " print(f\"n_ests: {len(rfc.estimators_)}\")\n",
- " print(f'Train AUC: {roc_train}')\n",
- " print(f'Test AUC: {roc_test}')\n",
- " \n",
- " return roc_train, roc_test\n",
- "\n",
- "\n",
- "def inc_fit(x: np.array, y: np.array,\n",
- " rfc=None,\n",
- " steps=np.arange(1, 101, 2),\n",
- " sample: int=1):\n",
- " \"\"\"\n",
- " Fit a random forest model with an increasing number of estimators.\n",
- " \n",
- " Uses .fit with warm_start=True.\n",
- " \n",
- " :param rfc: RFC model to test. Default = None (use example with default RFC params).\n",
- " If model is supplied, the .n_estimators param will be ignored and managed here.\n",
- " :param steps: Range to iterate over. Sets total number of estimators that will be fit in model\n",
- " after each iteration. Should be range with constant step size.\n",
- " :param sample: Proportion of randomly sampled training data to use on each partial_fit call.\n",
- " If sample = 1, all training data is used on each interation,\n",
- " so should behave as standard random forest. Default = 1 (100%).\n",
- " \"\"\"\n",
- " \n",
- " x_train, x_test, y_train, y_test = train_test_split(x, y, \n",
- " test_size=0.25,\n",
- " random_state=1)\n",
- " \n",
- " if rfc is None:\n",
- " rfc = RandomForestClassifier(warm_start=True)\n",
- " \n",
- " train_scores = []\n",
- " test_scores = []\n",
- " for s in steps:\n",
- " # Fit model with these n ests\n",
- " rfc.set_params(n_estimators=s)\n",
- " rfc.fit(x_train, y_train)\n",
- " \n",
- " tr_score, te_score = score(rfc, \n",
- " train=(x_train, y_train),\n",
- " test=(x_test, y_test),\n",
- " pr=False)\n",
- " train_scores.append(tr_score)\n",
- " test_scores.append(te_score)\n",
- " \n",
- " return train_scores, test_scores\n",
- "\n",
- "\n",
- "def plot_auc(steps, train_scores, test_scores):\n",
- " \"\"\"\n",
- " Plot the train and test auc scores vs total number of model estimators\n",
- " \"\"\"\n",
- " \n",
- " fig = plt.figure(figsize=(4, 4))\n",
- " plt.plot(steps, train_scores)\n",
- " plt.plot(steps, test_scores)\n",
- " plt.xlabel('n_estimators')\n",
- " plt.ylabel('auc')\n",
- " plt.legend(['train', 'test'])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "CPU times: user 18.2 s, sys: 219 ms, total: 18.4 s\n",
- "Wall time: 18.5 s\n",
- "With 119: 1.0 | 0.6327917799469568\n"
- ]
- },
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAARUAAAELCAYAAAD3MhIJAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAHqJJREFUeJzt3XucVXW9//HXZ+5chusMCAwIKSmECYoK6a80M8EMM8tAefyOp5L6lWV1jiW/ytRzfr/jOfbzZL+UIg95unhLK0k5iilmmbfBK1cZvMRAwDBylRmYy+f88V0jm2GQAdZam1nzfj4e+8Fea6+912cWs9+zvt+11neZuyMiEpeCfBcgItmiUBGRWClURCRWChURiZVCRURipVARkVgpVEQkVgoVEYmVQkVEYlWU7wIOVkVFhY8cOTLfZYh0O4sXL97k7pUHWq7LhcrIkSOprq7Odxki3Y6ZvdmZ5dT8EZFYKVREJFYKFRGJlUJFRGKlUBGRWCUWKmY2z8w2mtmS/bxuZvZDM6sxs5fN7KSkahGR9CS5p3I7MOVdXp8KjI4es4A5CdYiIilJ7DwVd3/CzEa+yyIXAD/3MJ7l02bWz8yGuPvfkqqpM1pbnc07d7N+WyP1O3azq7mVppa2h9Pa6rS40xoNw+kO7zogp4brlCNcz5IiLjq5KrbPy+fJb8OANTnTtdG8fULFzGYR9mYYMWJE7IW8vultFrzyNx5asp4V67fR1KIgkO5jWL8emQmVTnP3ucBcgIkTJ8b2ja/ZuJ2v3f0iS9ZuA2DCiH589oxRHNWnjKP6lFFRXkpZUSHFRUZxYQHFBQUUFEBhgVFghrV9kEHO1D5s/y+J5F1BzL+g+QyVtcDwnOmqaF4qVqzfxqU/fQYz47vnj2XKuKMY1q9HWqsXyax8hsp84Aozuws4DdiaVn/K0nVbmXnbM5QUFXDH5ZM4prJ3GqsV6RYSCxUzuxM4E6gws1rge0AxgLv/GFgAnAfUADuBv0+qllxL1m7l0tueoVdJIXdcPomRFb3SWK1It5Hk0Z8ZB3jdgS8ntf79+beHV1JSVMDdX5jM8AE90169SOZ1qzNqdzW38Ozr9Zz//iEKFJGEdKtQef7NLTQ2tXL6MRX5LkUks7pVqDxZs4nCAuO09wzIdykimdW9QmX1Jk6s6kt5WXG+SxHJrG4TKtsam3hpzRbOOFZNH5EkdZtQeXp1Pa0OpytURBLVbULlL6vr6VFcyIQR/fNdikimdZtQ+XPNJk4dNYCSom7zI4vkRbf4hm3Y1kjNxh2cfuzAfJciknndIlSerNkEqD9FJA3dJFTqGdCrhDFH9cl3KSKZl/lQcXeerNnE5GMGUlCggU1Ekpb5UKnbvov12xo55Wgd9RFJQ+ZDZWtDEwADe5fmuRKR7iHzobKtMYRKnx46NV8kDdkPlYZmAPqUdYnheEW6vOyHivZURFLVDUKlbU9FoSKShuyHStRRW67mj0gqsh8qjU2UFBVQVlyY71JEuoXsh0pDs5o+IinKfKhsb2yiTw81fUTSkvlQ2dbYrOEjRVKU/VBpaNI5KiIpyn6oNDbpHBWRFGU/VNRRK5KqzIeKOmpF0pXpUGlsamFXc6v2VERSlOlQ2d6oiwlF0pbpUNHFhCLpy3aoRNf9qPkjkp5Mh8o7zR911IqkJtFQMbMpZrbSzGrM7OoOXj/azB41s5fN7HEzq4pz/W3NH51RK5KexELFzAqBW4CpwFhghpmNbbfY94Gfu/v7geuBf4mzhj2jvilURNKS5J7KqUCNu7/m7ruBu4AL2i0zFngser6og9cPy56OWjV/RNKSZKgMA9bkTNdG83K9BHwyen4hUG5msd2bdHtjE0UFRg+NpSKSmnx31P4j8CEzewH4ELAWaGm/kJnNMrNqM6uuq6vr9Idva2imvKwIM91ETCQtSYbKWmB4znRVNO8d7r7O3T/p7hOAb0fztrT/IHef6+4T3X1iZWVlpwvQxYQi6UsyVJ4DRpvZKDMrAaYD83MXMLMKM2urYTYwL84CwrAHChWRNCUWKu7eDFwBPAwsB+5x96Vmdr2ZTYsWOxNYaWavAoOB/xNnDdsam9VJK5KyRL9x7r4AWNBu3jU5z+8F7k1q/dsbmxhU3jupjxeRDuS7ozZRbR21IpKebIdKo/pURNKW2VBpamll5+4WHf0RSVlmQ2WHxlIRyYvMhoouJhTJj+yGStvFhGr+iKQqu6HSdjGhmj8iqcpuqDRoKEmRfMhsqOwZ9U2hIpKmzIbKno5aNX9E0pTdUGlowgx6lyhURNKU3VBpbKa8tIiCAo2lIpKmDIeKxlIRyYfshopuzC6SF9kNlcYmddKK5EF2Q6VBzR+RfMhsqGxvVPNHJB8yGyqho1bNH5G0ZTJUWludHbuadYWySB5kMlS272rGXRcTiuRDJkNFFxOK5E8mQ+WdiwnV/BFJXSZDRTdmF8mfbIZKW/NHeyoiqctmqKj5I5I32QyVBjV/RPIlk6HS1lHbu1ShIpK2TIZKUaExYkBPigoz+eOJHNEy+af8y2cdy5fPOjbfZYh0S/pTLiKxUqiISKwUKiISq0RDxcymmNlKM6sxs6s7eH2EmS0ysxfM7GUzOy/JekQkeYmFipkVArcAU4GxwAwzG9tuse8A97j7BGA6cGtS9YhIOpLcUzkVqHH319x9N3AXcEG7ZRzoEz3vC6xLsB4RSUGSh5SHAWtypmuB09otcy2w0My+AvQCPpJgPSKSgnx31M4Abnf3KuA84Bdmtk9NZjbLzKrNrLquri71IkWk85IMlbXA8Jzpqmhers8B9wC4+1NAGVDR/oPcfa67T3T3iZWVlQmVKyJxSDJUngNGm9koMyshdMTOb7fMX4GzAcxsDCFUtCsi0oUlFiru3gxcATwMLCcc5VlqZteb2bRosX8ALjezl4A7gcvc3ZOqSUSSl+i1P+6+AFjQbt41Oc+XAacnWYOIpCvfHbUikjEKFRGJlUJFRGKlUBGRWHUqVMxskpmV50z3MbP2Z8eKiHR6T2UOsCNnekc0T0RkL50NFcs9f8TdW8noUJQicng6GyqvmdlXzaw4elwJvJZkYSLSNXU2VL4IfIBw7U7b1cazkipKRLquTjVh3H0j4dodEZF31alQMbOfEQZU2ou7fzb2ikSkS+tsZ+sDOc/LgAvRKG0i0oHONn/uy502szuBPydSkYh0aYd6Ru1oYFCchYhINnS2T2U7e/pUHNgAfDOpokSk6+ps86fczAYQ9lDK2mYnVpWIdFmd3VP5PHAlYZzZF4FJwFPAh5MrTUS6os72qVwJnAK86e5nAROALYlVJSJdVmdDpdHdGwHMrNTdVwDHJVeWiHRVnT1PpdbM+gG/Ax4xs83Am8mVJSJdVWc7ai+Mnl5rZosItyh9KLGqRKTLOujhC9z9j0kUIiLZoOEkRSRWChURiZVCRURipVARkVgpVEQkVgoVEYmVQkVEYqVQEZFYKVREJFYKFRGJlUJFRGKVaKiY2RQzW2lmNWZ2dQev/7uZvRg9XjUzjdEi0sUldj9kMysEbgHOIdzV8Dkzm+/uy9qWcfev5yz/FcLgTyLShSW5p3IqUOPur7n7buAu4IJ3WX4GcGeC9YhICpIMlWHAmpzp2mjePszsaGAU8Nh+Xp9lZtVmVl1XVxd7oSISnyOlo3Y6cK+7t3T0orvPdfeJ7j6xsrIy5dJE5GAkGSprgeE501XRvI5MR00fkUxIMlSeA0ab2SgzKyEEx/z2C5nZ8UB/wi0/RKSLSyxU3L0ZuAJ4GFgO3OPuS83sejOblrPodOAud9fNyUQyILFDygDuvgBY0G7eNe2mr02yBhFJ15HSUSsiGaFQEZFYKVREJFYKFRGJlUJFRGKlUBGRWClURCRWChURiZVCRURipVARkVgpVEQkVgoVEYmVQkVEYqVQEZFYKVREJFYKFRGJlUJFRGKlUBGRWClURCRWChURiZVCRURileho+iKSkKZGaNgMu7bBru3hsfttaG6E5l3h39YWaG0OD4CSnlDSG4p7QONW2LER3q6DwmL46D/HVppCRSRJ7tDUEL7ETTuhsASKSsO/xT2hqGTPcru2wbZ1sG1t+MLv2AA76uDtjfD2pvDYWR/CpLkhnvpKymHQ8fF8VkShIhKHxq2w+Q146zVYvwQ2LIENS2H7emht2v/7CoqguBd4C+zese/rJb2hVwX0rIC+VTDkROjZH8r6QY9+4d/S8vAo6QVFPUJotQWXFYR1eGsIt907wr9lfaBXZdhriZlCRWR/WluhZRe07A57CJvfgLdehy1v7r0nsXUNNG7Z8z4rhMrjYMTkEARlfcOjuEf4rOZd0aMBdu8MezAY9BkKfYdB+VAoHwy9BkFp7/h+nrI+wOD4Pm8/FCpy5HMPX9p3mgCboGELlA+BAaOg34jQLwChH6F5F5iFv9JWEP5KtzSFvoWd9bDmWVjzNNRWR02S0j3NkMZtoRnSuG3/exgFxVB+VPhL37cKhp8C/UdB/6PDvxXvheKydLbNEUihIuloaQ6dgtv/Fv7dtT3qZNwRvvQFReHRsjv0KWytDXsAO6L+hHdrQlhhaCY0N4T3d0ZpX6iaCD36h07Nlt0hvAaODnsVpeWhz6OwODQlSvuEAOs/KoRZgQ6c7o9CRQ7O7p2waSVsWAb1q8IXsO/waDe/H+B7OifXPR/2BmqfDU0Hb+3cOkrKod9w6DMMjjoh7BH0qgz9Cr2iR1nf0F9RvxreWh3CqbhHu87P1tCEMQvhUFAcmhPDTobKMQqGhChU5MA2rYIVD8DyB2DtYsDDfCsMHYzvpvdgqDoFxl0U/sKXD4Heg8Jf/tLy8CW3wj2HPguKorZ/J/QfCSMmHc5PJglQqEjHmnfBy/fA03Ng49Iwb+gE+OBVcNQ4GPS+0BxoagjNlS1rYNdWwKI9gxIYPC70d5jl9UeRdClUZG/b1sFLd8IzPwlHNwafAFNvhOPPC02c9kp7hyMdlcelX6sckRQqWbLzrRAKEPYOWlvC4c/61VBfE450lPQKfRalvaNzHfqH/okNS2DlAlj3Qnj/MR+GC38C7zlTexpyUBINFTObAtwMFAK3ufsNHSxzMXAtoaH+krtfkmRNmbJ9Ayz9Dax5JoTB5jf2v2yvQaEPY/eOcDr3PidaWej7OPt7cPz5UPneJCuXDEssVMysELgFOAeoBZ4zs/nuvixnmdHAbOB0d99sZoOSqqdLaTsvo6zfvnsJTQ2w+jF44Zfw6sOho7TvCBg2AU6+DAa8Z89nWEE4ijLgmH07P1tbwlmgDZvDOR/9hocOVJHDlOSeyqlAjbu/BmBmdwEXAMtylrkcuMXdNwO4+8YE6+kaXn8CHvvnsPfR+6hw+HPYhHCuxppnYf0r4ZyNXoPgA1fA+JmHtldRUAg9B4SHSIySDJVhwJqc6VrgtHbLvBfAzJ4kNJGudfeHEqzpyOMejp6sXwJP3wqv/zEcdv3gN0N/SG01rHwwXNMx7CSY/GUYeUbo62g7i1TkCJLvjtoiYDRwJlAFPGFmJ7j7ltyFzGwWMAtgxIgRadeYjDefgj9cGy462709zOtZAef+X5j42b0v9GrcuufsTpEjXJKhshYYnjNdFc3LVQs84+5NwOtm9iohZJ7LXcjd5wJzASZOnOiJVZyG1lb4y83w6D+Fi8fGz4DK48Nj6PhwdKa9sr7p1ylyiJIMleeA0WY2ihAm04H2R3Z+B8wAfmZmFYTm0GsJ1pRfb9fD774IqxbC2E/AtP/f+bNHRbqIxELF3ZvN7ArgYUJ/yTx3X2pm1wPV7j4/eu2jZrYMaAGucvf6pGrKC3eofQ4W3w5LfhOO1pz3fTjl8zr/QzLJ3LtWa2LixIleXV2d7zIOzD1cL/P4DeHEspLecMKn4LQvwqAx+a5ODkFTUxO1tbU0Njbmu5RElZWVUVVVRXHx3n14ZrbY3Sce6P357qjNhp1vhbNVS8tDePw16oStfS5cSv/xm8MFdaXl+a5UDkNtbS3l5eWMHDkSy+heprtTX19PbW0to0aNOqTPUKgcriX3wW//VxghLFf50NBncuIlUKjNnAWNjY2ZDhQAM2PgwIHU1dUd8mfot/1QucOfb4JHrw/DBp44I5z6vms79BwIE2YmMv6n5FeWA6XN4f6MCpVD0dIED3wdXvgFjPsUXHBLtx4+UNKxZcsW7rjjDr70pS8d1PvOO+887rjjDvr165dQZXvT0FcHq7UF7vt8CJQPXgUX3aZAkVRs2bKFW2+9dZ/5zc3N7/q+BQsWpBYooD2Vg+MOD34Dlv0u3HzpA1/Jd0XSjVx99dWsXr2a8ePHU1xcTFlZGf3792fFihW8+uqrfOITn2DNmjU0NjZy5ZVXMmvWLABGjhxJdXU1O3bsYOrUqZxxxhn85S9/YdiwYdx///306BFvM12hcjAe+6dwvskZ31CgdHPX/X4py9Zti/Uzxw7tw/c+/r79vn7DDTewZMkSXnzxRR5//HE+9rGPsWTJkneO0sybN48BAwbQ0NDAKaecwkUXXcTAgQP3+oxVq1Zx55138tOf/pSLL76Y++67j5kzZ8b6cyhUDqS1BepWhKEVn/xBGF7g7GvyXZUIp5566l6HfX/4wx/y29/+FoA1a9awatWqfUJl1KhRjB8/HoCTTz6ZN954I/a6FCrttTSHAY/eeALeeDKca7Ir+os07lPwsZt0Jqy86x5FWnr12nOd2OOPP84f/vAHnnrqKXr27MmZZ57Z4Ul6paWl7zwvLCykoSGm26fmUKi02b0THv7f8Mqv94yKVjkmnLQ2YhIMPy2M3q5AkTwpLy9n+/btHb62detW+vfvT8+ePVmxYgVPP/10ytXtoVCBMAzj3TPDmCYTLoVjPwJHnwG9K/Ndmcg7Bg4cyOmnn864cePo0aMHgwfvuYXplClT+PGPf8yYMWM47rjjmDQpf7cu0bU/NY/CfZ8LN5765G3w3o/G99mSKcuXL2fMmO5x3VZHP6uu/emMl+4OQxFUjoHP/AIGHpPvikS6vO4bKkvuC4Fy9Okw465wywoROWzdM1SWzYf7Lofhk+CSuzsebU1EDkn3O01/1SNw79+HUeovvUeBIhKz7hUqLU3wwDeg4r0w816NbyKSgO7V/HnpLtj6V7jk1xpMWiQh3WdPpaUZ/vR9GDIeRp+T72pEDtr+rlLujB/84Afs3Lkz5oo61n1C5ZVfh5PcPvQtnRUrXVJXCZXu0fxpbYEnboTBJ8BxU/NdjcghyR364JxzzmHQoEHcc8897Nq1iwsvvJDrrruOt99+m4svvpja2lpaWlr47ne/y4YNG1i3bh1nnXUWFRUVLFq0KNE6u0eoLPkNvLUaLv659lIkHv91dbivdZyOOgGm3rDfl3OHPli4cCH33nsvzz77LO7OtGnTeOKJJ6irq2Po0KE8+OCDQLgmqG/fvtx0000sWrSIioqKeGvuQPabPy1NYS+lcgwc//F8VyMSi4ULF7Jw4UImTJjASSedxIoVK1i1ahUnnHACjzzyCN/61rf405/+RN++6R+QyPaeSmsr3H8FbFoJn/kVFGQ/QyUl77JHkQZ3Z/bs2XzhC1/Y57Xnn3+eBQsW8J3vfIezzz6ba65Jd/yfbH/LHr0WXr4Lzvo2jDk/39WIHJbcoQ/OPfdc5s2bx44dYZiOtWvXsnHjRtatW0fPnj2ZOXMmV111Fc8///w+701advdUnroVnrwZJn4uDFAt0sXlDn0wdepULrnkEiZPngxA7969+eUvf0lNTQ1XXXUVBQUFFBcXM2fOHABmzZrFlClTGDp0aOIdtdkc+uCVe8NwBmM+Dp/+TygoTKc4yTQNfdC5oQ+y2fxp2Q0j/0cYH0WBIpKqbDZ/xl8C75+ujlmRPMjut06BIpIX+uaJHISu1gd5KA73Z1SoiHRSWVkZ9fX1mQ4Wd6e+vp6yskO/lW+ifSpmNgW4GSgEbnP3G9q9fhlwI7A2mvUjd78tyZpEDlVVVRW1tbXU1dXlu5RElZWVUVVVdcjvTyxUzKwQuAU4B6gFnjOz+e6+rN2id7v7FUnVIRKX4uLive4IKB1LsvlzKlDj7q+5+27gLuCCBNcnIkeAJENlGLAmZ7o2mtfeRWb2spnda2bDE6xHRFKQ747a3wMj3f39wCPAf3a0kJnNMrNqM6vOentWpKtLsqN2LZC751HFng5ZANy9PmfyNuDfOvogd58LzAUwszoze/MA664ANh1swQlQHfs6UmpRHfs6UC1Hd+ZDkgyV54DRZjaKECbTgUtyFzCzIe7+t2hyGrD8QB/q7ge8wbGZVXfmGoWkqY59HSm1qI59xVVLYqHi7s1mdgXwMOGQ8jx3X2pm1wPV7j4f+KqZTQOagbeAy5KqR0TSkeh5Ku6+AFjQbt41Oc9nA7OTrEFE0pXvjtqkzM13ARHVsa8jpRbVsa9Yauly46mIyJEtq3sqIpInmQoVM5tiZivNrMbMrk5xvcPNbJGZLTOzpWZ2ZTR/gJk9Ymaron/7p1RPoZm9YGYPRNOjzOyZaLvcbWYlKdXRLzqpcYWZLTezyfnYJmb29ej/ZYmZ3WlmZWltEzObZ2YbzWxJzrwOt4EFP4xqetnMTkq4jhuj/5uXzey3ZtYv57XZUR0rzezcg1lXZkIl51qjqcBYYIaZjU1p9c3AP7j7WGAS8OVo3VcDj7r7aODRaDoNV7L34fl/Bf7d3Y8FNgOfS6mOm4GH3P144MSoplS3iZkNA74KTHT3cYQjkdNJb5vcDkxpN29/22AqMDp6zALmJFzHI8C46OTTV4kOmkS/u9OB90XvuTX6fnWOu2fiAUwGHs6Zng3MzlMt9xMupFwJDInmDQFWprDuKsIv6oeBBwAjnNBU1NF2SrCOvsDrRP12OfNT3SbsuVxkAOFo5wPAuWluE2AksORA2wD4CTCjo+WSqKPdaxcCv4qe7/XdIZwWMrmz68nMngqdv9YoUWY2EpgAPAMM9j0n960HBqdQwg+AbwKt0fRAYIu7N0fTaW2XUUAd8LOoKXabmfUi5W3i7muB7wN/Bf4GbAUWk59t0mZ/2yCfv8OfBf4rjjqyFCp5Z2a9gfuAr7n7ttzXPER+oofazOx8YKO7L05yPZ1UBJwEzHH3CcDbtGvqpLRN+hOujh8FDAV6sW8zIG/S2AYHYmbfJjThfxXH52UpVA54rVGSzKyYECi/cvffRLM3mNmQ6PUhwMaEyzgdmGZmbxCGmvgwoV+jn5m1neiY1napBWrd/Zlo+l5CyKS9TT4CvO7ude7eBPyGsJ3ysU3a7G8bpP47HA2Udj5waRRwh11HlkLlnWuNop786cD8NFZsZgb8B7Dc3W/KeWk+8HfR878j9LUkxt1nu3uVu48k/PyPufulwCLgU2nVEdWyHlhjZsdFs84GlpHyNiE0eyaZWc/o/6mtjtS3SY79bYP5wP+MjgJNArbmNJNiZ2Fkxm8C09x9Z7v6pptZaXTt3mjg2U5/cJKdZGk/gPMIvdirgW+nuN4zCLuwLwMvRo/zCP0ZjwKrgD8AA1Ks6Uzggej5e6Jfihrg10BpSjWMB6qj7fI7oH8+tglwHbACWAL8AihNa5sAdxL6cpoIe2+f2982IHSq3xL9/r5COGKVZB01hL6Ttt/ZH+cs/+2ojpXA1INZl86oFZFYZan5IyJHAIWKiMRKoSIisVKoiEisFCoiEiuFiojESqEisTCz8WZ2Xs70tLiGnzCzr5lZzzg+S5Kn81QkFtHp3hM9gVvYRpcdTHT3Tt/KwswK3b0l7lrkwLSn0s2Y2chowKSfRgMXLTSzHvtZ9hgze8jMFpvZn8zs+Gj+p6MBj14ysyeiyyKuBz5jZi+a2WfM7DIz+1G0/O1mNsfMnjaz18zszGjQoOVmdnvO+uZYuGncUjO7Lpr3VcKFgIvMbFE0b4aZvRLV8K85799hZv/PzF4CJpvZDRYGznrZzL6fzBaVfaRxurYeR86DMKZGMzA+mr4HmLmfZR8FRkfPTyNcSwThFPJh0fN+0b+XAT/Kee8704QBgu4inIZ+AbANOIHwR21xTi1tp6sXAo8D74+m3wAqoudDCdfzVBKuhH4M+ET0mgMXR88HEk4xt9w69Uj+oT2V7ul1d38xer6YEDR7iYZx+ADwazN7kTCA0JDo5SeB283sckIAdMbvPXy7XwE2uPsr7t4KLM1Z/8Vm9jzwAmHUsY5G7jsFeNzDVcdtl+t/MHqthXClOIRxUxqB/zCzTwI79/kkSUSi9/2RI9aunOctQEfNnwLCQEbj27/g7l80s9OAjwGLzezkg1hna7v1twJF0dWw/wic4u6bo2ZRWSc+N1ejR/0oHm5mdyrhquRPAVcQhoKQhGlPRTrkYZCp183s0/DOoMwnRs+PcfdnPNwYro4w9sZ2oPwwVtmHMJDTVjMbTBivtU3uZz8LfMjMKqJxU2cAf2z/YdGeVl8PN7T7OmGMXEmB9lTk3VwKzDGz7wDFhH6Rl4AbzWw0oY/k0WjeX4Gro6bSvxzsitz9JTN7gTBEwRpCE6vNXOAhM1vn7mdFh6oXRet/0N07GgulHLjfzMqi5b5xsDXJodEhZRGJlZo/IhIrNX8EM7uFMG5rrpvd/Wf5qEe6NjV/RCRWav6ISKwUKiISK4WKiMRKoSIisVKoiEis/huBrWiEKBXXogAAAABJRU5ErkJggg==\n",
- "text/plain": [
- ""
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "steps = np.arange(1, MAX_ESTIMATORS, 2)\n",
- "rfc = RandomForestClassifier(warm_start=True)\n",
- "\n",
- "%time train_scores, test_scores = inc_fit(x, y, rfc=rfc, steps=steps)\n",
- "print(f\"With {len(rfc.estimators_)}: {train_scores[-1]} | {test_scores[-1]}\")\n",
- "plot_auc(steps, train_scores, test_scores)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Streaming random forest"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {},
- "outputs": [],
- "source": [
- "def inc_partial_fit(x: np.ndarray, y:np.ndarray,\n",
- " srfc=None,\n",
- " steps=np.arange(1, 101, 2),\n",
- " sample: int=0.1,\n",
- " **kwargs) -> None:\n",
- " \n",
- " \"\"\"\n",
- " Fit increasing number of estimators using .partial_fit on a subsample of the training data.\n",
- " \n",
- " StreamingRFC.n_estimators: Number of estimators that will be fit in each step. Set from first\n",
- " difference in range (ie. range[1]-range[0])\n",
- " StreamingRFC.max_n_estimators: Limit on number of estimators than will be fit in model. Should >\n",
- " range[-1].\n",
- " \n",
- " :param srfc: StreamingRFC model to test. Default = None (use example with default RFC params).\n",
- " If model is supplied, the .n_estimators param should match the constant range\n",
- " step size.\n",
- " :param steps: Range to iterate over. Sets total number of estimators that will be fit in model\n",
- " after each iteration. Should be range with constant step size.\n",
- " :param sample: Proportion of randomly sampled training data to use on each partial_fit call.\n",
- " If sample = 1, all training data is used on each interation,\n",
- " so should behave as standard random forest. Default = 0.1 (10%)\n",
- " \"\"\"\n",
- " \n",
- " x_train, x_test, y_train, y_test = train_test_split(x, y, \n",
- " test_size=0.25,\n",
- " random_state=1)\n",
- " n_train = x_train.shape[0]\n",
- " \n",
- " if srfc is None:\n",
- " srfc = StreamingRFC(n_estimators_per_chunk=np.diff(steps)[0],\n",
- " max_n_estimators=np.max(steps),\n",
- " **kwargs)\n",
- " \n",
- " train_scores = []\n",
- " test_scores = []\n",
- " for s in steps:\n",
- " \n",
- " use_idx = np.arange(0, n_train)[np.random.randint(low=0, \n",
- " high=n_train, \n",
- " size=int(n_train * sample))]\n",
- " \n",
- " # Fit model with these n ests\n",
- " srfc.partial_fit(x_train[use_idx, :], y_train[use_idx],\n",
- " classes=np.unique(y))\n",
- " \n",
- " tr_score, te_score = score(srfc,\n",
- " train=(x_train, y_train),\n",
- " test=(x_test, y_test),\n",
- " pr=False)\n",
- " train_scores.append(tr_score)\n",
- " test_scores.append(te_score)\n",
- " \n",
- " return train_scores, test_scores"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### As normal random forest\n",
- "1 estimator per full subset"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "CPU times: user 32.2 s, sys: 78.1 ms, total: 32.3 s\n",
- "Wall time: 32.7 s\n",
- "With 119: 0.9999999999999999 | 0.6331369808306709\n"
- ]
- },
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAARUAAAELCAYAAAD3MhIJAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3Xt4XWWZ9/HvnVOT9JAeUkrbUFqxHMqphXBmhpNIC1pAHATkVRy1Og6KzojCpaIyr684HkZ8RRh0gFEYEMFD0QoFLIJyTLGUlhZaWtqkBRrapoecs3PPH88K3U1TmrZrrZ3u/ftcV67uvdbKXndWs395nmet9Wxzd0RE4lKU6wJEJL8oVEQkVgoVEYmVQkVEYqVQEZFYKVREJFYKFRGJlUJFRGKlUBGRWJXkuoDdVV1d7RMnTsx1GSIFZ/78+W+5++hdbbfPhcrEiROpq6vLdRkiBcfMVvVnO3V/RCRWChURiZVCRURipVARkVgpVEQkVomFipndZmbrzGzRTtabmf3IzJab2UIzOyapWkQkPUm2VO4Apr/D+hnA5OhrFnBzgrWISEoSu07F3R83s4nvsMn5wM89zGf5tJkNN7Ox7v56UjXlirvT3tVNW2eG1s4MrR0ZOjNOZ6abzkw3Xd1OZ1c3HZluMt1Optvp9vB9Gd/2uNud7m7odscBHKJHuINH/769X3acKvSdZg9NfWJRTWU6IFSWlXDRsTWxvV4uL34bD9RnPW+Ilu0QKmY2i9CaYcKECakU118bmztY8VYza5taWdvUSv3GFt7Y1MaG5g7WN3ewYWsHzR1ddOv9IwPU+OEVeRMq/ebutwK3AtTW1ubs7enuLF67mWdWbmD+qg3MX7WRNze3b7dNVUUpY6vKGTm4jKNrhjNycBlDy0soLy2msqyY8tJiKkqLKS0uoqTYKIv+LS0uCsuKjOIiwwyKi4wiM4oMiswwM4otrAMoKgoPoqeYgbFtffa67fS5sGfVO6xMgKW7O+lDUcz/CbkMlTXAAVnPa6JlA079hhbuemY1D7ywljVNrQDUjKjgpHeN4ojxVUyqHkzNiErGDi9nWHlpjqsVya1chsps4Eozuwc4Adg00MZTVjRu5QcPv8IfXnydIjNOO3g0n3/PZE6dXM3YqopclycyICUWKmZ2N3A6UG1mDcDXgVIAd78FmAOcCywHWoCPJVXL7nJ3fvbESm54cCmDSor49GkH8ZGTDlSQiPRDkmd/Lt3Fegf+Oan976muTDfXzV7M/zyzmumH78+/XXAEo4cOynVZIvuMfWKgNi1tnRmuuudvPLT4TT5z+kF88b2HvD0YKiL9o1CJuDtX/s/zPLJkHV9//xQ+dsqkXJcksk/SvT+RO558jUeWrOO69ylQRPaGQgV4+Y0tfPuPSznr0P342CkTc12OyD6t4EMl0+1cfd8LDCsv4TsfPArT1Vgie6Xgx1Ru/+tKFjZs4seXTaN6iM7yiOytgm6pvL6ple/PfYX3HLYf5x05NtfliOSFgg6VO59eRXtXhq+//3B1e0RiUrCh0t6V4Z5n6znz0DEcMLIy1+WI5I2CDZU/vvgG65s7+MhJB+a6FJG8UrChcufTq5g4qpJT312d61JE8kpBhsr6re3UrdrIRcfU6DJ8kZgVZKg8vWIDAKdOVitFJG4FGSpPvvoWQwaVcOT4qlyXIpJ3CjJUnnp1PSdMGklJcUH++CKJKrh31eubWlnxVjMnHTQq16WI5KWCC5WnXl0PoFARSUhBhsrwylIO239YrksRyUsFFyoL6ps4ZsIInUoWSUhBhcrW9i6WN27lqBqd9RFJSkGFyuI1m3CHo2uG57oUkbxVUKGysGETgFoqIgkqqFB5oaGJ8cMrGKXJmEQSU1ChsrBhE0cfoFaKSJIKJlQ2NnewekMLR47XeIpIkgomVF5cE8ZTjtZ4ikiiCiZUlq3bCsBhY3XRm0iSCiZU6je0MLS8hOGVpbkuRSSvFUyorN7QwoSRlZrgWiRhBRMqq9Y3M0ETXIskriBCpbvbqd/YqlARSUFBhMq6Le10dHXrozhEUpBoqJjZdDN72cyWm9k1faw/0MweNbOFZvaYmdUkUcfqDS0AaqmIpCCxUDGzYuAmYAYwBbjUzKb02ux7wM/d/SjgeuDbSdSiUBFJT5ItleOB5e6+wt07gHuA83ttMwX4U/R4Xh/rY7F6QwtFBuOGVyTx8iKSJclQGQ/UZz1viJZlewH4QPT4QmComcU+z+Pq9c2MraqgrKQghpBEcirX77IvAqeZ2d+A04A1QKb3RmY2y8zqzKyusbFxt3eyekMLB45S10ckDUmGyhrggKznNdGyt7n7Wnf/gLtPA74SLWvq/ULufqu717p77ejRo3e7kNUbdDpZJC1JhspzwGQzm2RmZcAlwOzsDcys2sx6argWuC3uIlo6unhra7tOJ4ukJLFQcfcu4ErgIWAJcK+7Lzaz681sZrTZ6cDLZvYKMAb4Vtx1NG5pB2DMsPK4X1pE+lCS5Iu7+xxgTq9l12U9vg+4L8kaNrZ0AjBCNxKKpCLXA7WJ29jSAcDwyrIcVyJSGPI+VJqiUFFLRSQdeR8qG5t7uj9qqYikIe9DpamlAzMYVqGWikga8j5UNrZ0UlVRSrE+5lQkFQUQKh3q+oikKO9DpamlU/PSiqQo70NFLRWRdOV9qKilIpKuvA8VtVRE0pXXodLelaGlI8PIwQoVkbTkdag0Rff9qPsjkp68DpWNb1+ir5aKSFryO1Sa1VIRSVteh0qTWioiqcvrUNk2l4pCRSQteR4qPXOpqPsjkpa8DpWmlg4qSospLy3OdSkiBSOvQ2VjS6cmZxJJWV6HSlNLh6aRFElZXofKhuYORgxWS0UkTXkdKlvauhhWrlARSVNeh0pLR4bKskQ/hUREesnzUOmiskxnfkTSlOehklGoiKQsb0Ml0+20d3Wr+yOSsrwNlZaOLgC1VERSlreh0tqRAaBCoSKSqrwNlZYoVAYPUqiIpClvQ6U56v5UlGpMRSRNeRsqPd0fjamIpCtvQ0XdH5HcSDRUzGy6mb1sZsvN7Jo+1k8ws3lm9jczW2hm58a17xZ1f0RyIrFQMbNi4CZgBjAFuNTMpvTa7KvAve4+DbgE+Elc+29R90ckJ5JsqRwPLHf3Fe7eAdwDnN9rGweGRY+rgLVx7fztUFH3RyRVSfYNxgP1Wc8bgBN6bfMNYK6ZfRYYDLwnrp1vu/hN3R+RNOV6oPZS4A53rwHOBX5hZjvUZGazzKzOzOoaGxv79cI9LZUKTSUpkqokQ2UNcEDW85poWbaPA/cCuPtTQDlQ3fuF3P1Wd69199rRo0f3a+etHRnKS4soLrI9qV1E9lCSofIcMNnMJplZGWEgdnavbVYDZwGY2WGEUOlfU2QXmju61PURyYHEQsXdu4ArgYeAJYSzPIvN7Hozmxlt9q/AJ83sBeBu4Ap39zj239KRUddHJAcS/VPu7nOAOb2WXZf1+CXglCT23dKuuVREciHXA7WJaenMUDlI3R+RtOVtqLR2dFGp7o9I6vI2VJrV/RHJibwNlVZ1f0Ryol+hYmYnmtnQrOfDzKz31bEDSou6PyI50d+Wys3A1qznW6NlA1ZLe0ZTSYrkQH9DxbKvH3H3bhI+Hb033J2WzozmUhHJgf6Gygoz+5yZlUZfVwErkixsb3Rkusl0u66oFcmB/obKp4GTCffu9NxtPCupovZWS7tuJhTJlX79KXf3dYR7d/YJLZ2aSlIkV/oVKmZ2O2FCpe24+z/GXlEMWnumklT3RyR1/X3X/T7rcTlwITHO0ha35qj7o1PKIunrb/fn/uznZnY38JdEKoqBppIUyZ09vaJ2MrBfnIXEqbVTU0mK5Ep/x1S2sG1MxYE3gS8lVdTeerv7o4vfRFLX3+7PUDMbSWihlPcsTqyqvaRPJxTJnf62VD4BXEWYZ3YBcCLwFHBmcqXtOc2kL5I7/R1TuQo4Dljl7mcA04CmxKraS81qqYjkTH9Dpc3d2wDMbJC7LwUOSa6svdPakaHIYFBJ3s7sIDJg9bd/0GBmw4HfAg+b2UZgVXJl7Z3JY4ZwwdTxmOnjOUTSZrs7eb2ZnUb4iNIHo48zTVVtba3X1dWlvVuRgmdm8929dlfb7fZIprv/ec9KEpFCoEEHEYmVQkVEYqVQEZFYKVREJFYKFRGJlUJFRGKlUBGRWClURCRWChURiZVCRURipVARkVglGipmNt3MXjaz5WZ2TR/r/8PMFkRfr5jZgJ2jRUT6J7Gp0cysGLgJOJvwqYbPmdlsd3+pZxt3/0LW9p8lTP4kIvuwJFsqxwPL3X1FNEXCPcD577D9pcDdCdYjIilIMlTGA/VZzxuiZTswswOBScCfdrJ+lpnVmVldY2Nj7IWKSHwGykDtJcB97p7pa6W73+rute5eO3r06JRLE5HdkWSorAEOyHpeEy3ryyWo6yOSF5IMleeAyWY2yczKCMExu/dGZnYoMILwkR8iso9LLFTcvQu4EngIWALc6+6Lzex6M5uZteklwD2+u5PlisiAlOinbbn7HGBOr2XX9Xr+jSRrEJF0DZSBWhHJEwoVEYmVQkVEYqVQEZFYKVREJFYKFRGJlUJFRGKlUBGRWClURCRWChURiZVCRURipVARkVgpVEQkVgoVEYmVQkVEYqVQEZFYKVREJFYKFRGJlUJFRGKlUBGRWClURCRWic6mLyJ7qfkt2PomlJTDkP1g0NCwvDsDXW3Qthla1sOWN2DL62Hb7kzYdth4GDombN/aBG+9ApvXQMuGsH1zI1gRDBsLH7oztpIVKiJ7om0TrHkeXvtLeHOWVoAVAx7e1GWVMPIgKK8Cz4Q3/+a18OaisH7MFCgbEh53NkOmE0oGhW0al4bnbZth0+rt9zuoCjLtIVD2RFEJVIyAofvD4P1CvcWD9vZobEehItJbd3d401pR+Ou/4C5Y/XR4nOkIQbA5+gRfK4bKUdDVDt4dlhUVQUcLdHf2emGD6snhdZfNDWEDUFwW3uxdbTB4NOw3BcoGh5A5/pMw/IDw+lteD6FTMigEUkk5lA8LITFk/9DiGDIm1NS8DjZHLRez8HrVh4QwMUv08ClUJP91tcP6V0OLoq0J2rdC0yp4fSF0bA1h0LY5rGvZEFoO2awIxh0DY44ILRKAUe+GsVNhwgnbuiTZMl2wqR46msObuLwqhE/P92c6QzgVlUBxAm/DqprwlQMKFdn3tG2ChfdC48vhL3Fxafhrb8WhJdGxFZpWh/EIKwp/tbu7tn8NKwp/uStHAhbegPsfARUjw1//kkHgHloDU2bu/hu0uARGTnqH9aXhKw8pVCS33MO4xJIHwjjE8AOhphaqDgjdgbeWhQHGnhZG2yZYdF/4d1BVaM53d4W//J4Jb9TSwaHLMG5aeP0h+8GYw8O25cNh0JAwnlBWmeufPi8pVCQ9XR3QuhE2vApL/wANdbDxNdj6BpRUhDGI3i2KbEUlUFoJE0+F074M46amVrr0n0JF4lf/LDxzSxjHaN0QuhDtW8JpTDxsU1wG42vhoDNg0t/D4ReGsxBNq6DhudB1KSmDEZNg9CFQWQ2l5Tn9saR/FCryzjqaYcVjUP9MOOPQ1RZOpZaUw36HhQBYuwAqhocvK4bXF4RBybFTofrgcAq0bEjo0gyuhqFjQ5CUD9txfyMnvfNYhAx4ChXZ3rqlsPLP4VqJ118IX91doevR3RVCY8yUcEbllT/C6EPhqIuhsyV0bTqa4Yyvwon/FMYupOAoVApR22ZY/jCsehI2rIguwGoNZ1KaVoVtyqvCKdSTPwvvOh0mnBx9s4czIxCCpSTeC6dk35doqJjZdOBGoBj4mbvf0Mc2FwPfIHS2X3D3y5KsqSB0d4fB0LULYHNDuETbbNv1GSv+vK1LUn1wOGNSVhkGPk/+LBxyLgwbt+uLpBQo0ofEQsXMioGbgLOBBuA5M5vt7i9lbTMZuBY4xd03mtl+SdWT1zasgKVzwtjHuiXhIq9M+7b1xWXh1GpZZbgf5Ngr4IgPhIHSJC68koKW5G/U8cByd18BYGb3AOcDL2Vt80ngJnffCODu6xKsJz9seRPeeBE2rgz3kdQ/B+sWh3Wj3h1Otw4ZHVog448N131obENSlGSojAfqs543ACf02uZgADP7K6GL9A13fzDBmvY97uFsy+Jfh2s7Nq7ctm5QVeiyvPf/wmEzYcSBuatTJJLrtm8JMBk4HagBHjezI929KXsjM5sFzAKYMGFC2jXmxoaV8OKvYOEvYf1yKCqFg86E4z4RrhQd+a5w81iRpsSRgSXJUFkDHJD1vCZalq0BeMbdO4GVZvYKIWSey97I3W8FbgWora31xCqOw+svhPGNylHwrtPChVu74h7GQ5Y8EM7IbKoP968AHHgqnHJVaIlUDE+0dJE4JBkqzwGTzWwSIUwuAXqf2fktcClwu5lVE7pDKxKsKVn1z8IvLtwWCFYE0/4PHPo+2P/IcGs6hNvii8vC+pWPwePfh1V/CWdjDjw5tEiGT4BDzwv3sIjsQxILFXfvMrMrgYcI4yW3uftiM7seqHP32dG695rZS0AGuNrd1ydVU6IaX4E7Lwo3r330mbDsyR/Dcz+F5/87PJ/4dzBoWJhLw4rCFaXNjWEOjRnfhWM/qtO0ss8z94Hdm+ittrbW6+rqcl3Gjn7xgXCD3Gee3P42+damcJr3tb+EyX46W8Pp3KKSMOnOwdNDS0b3tQx4nZ2dNDQ00Na2h7Ou7SPKy8upqamhtHT7qRnMbL671+7q+3M9UJsflj0Crz4K7/3WjvNuVAyHA08KX6ddnZv6JBYNDQ0MHTqUiRMnYgnPnpYr7s769etpaGhg0qQ9uwdLpw72VlM9PPjlcDbm+Fm5rkYS1NbWxqhRo/I2UADMjFGjRu1Va0wtlT2x5AF4/udhlrCX54TpCC+5K9yqL3ktnwOlx97+jGqp7I7mt+CxG+CXl8ObL4W7ecdNg0//Jdx0J5KgpqYmfvKTn+z295177rk0NTXtesOYqKXSH13tcM9lsPyR8PzIi2Hm/9fgqqSqJ1Q+85nPbLe8q6uLkpKdv5XnzJmTdGnbUaj0x4PXhkD5uy/CwedAzXGJf8yBSG/XXHMNr776KlOnTqW0tJTy8nJGjBjB0qVLeeWVV7jggguor6+nra2Nq666ilmzwhjfxIkTqaurY+vWrcyYMYNTTz2VJ598kvHjx/O73/2OioqKWOtUqOzMhpUw71thguVlc8NVrWd9LddVyQDxzQcW89LazbG+5pRxw/j6+w/f6fobbriBRYsWsWDBAh577DHOO+88Fi1a9PZZmttuu42RI0fS2trKcccdx0UXXcSoUaO2e41ly5Zx991389Of/pSLL76Y+++/n8svvzzWn0Oh0pdMF9z/iXB9yfAJcPSlcOZ1ua5KZDvHH3/8dqd9f/SjH/Gb3/wGgPr6epYtW7ZDqEyaNImpU8OE4cceeyyvvfZa7HUpVPry5I2wpg4+eBsccVGuq5EB6J1aFGkZPHjw248fe+wxHnnkEZ566ikqKys5/fTT+zwtPGjQtiu2i4uLaW1tjb0unf3pre52mPf/wuzuChQZQIYOHcqWLVv6XLdp0yZGjBhBZWUlS5cu5emnn065um3UUsn28HXw1xvh3WfD+3+U62pEtjNq1ChOOeUUjjjiCCoqKhgzZszb66ZPn84tt9zCYYcdxiGHHMKJJ56Yszp170+PZ38Kc74ItR+Hc78LRcXx70P2aUuWLOGwww7LdRmp6Otn1b0/felqh/s/HgZgz/638LESi+4Ps8kvfwQmn6NAEdlLhRMqnW3hStjlD0PVBLjn0rC8akK46e/gc+DC/1SgiOylwgmVv/wgBMr7b4SjLwtTNQ4eDe9+j6ZkFIlRYYTK1nVhwqTDLwwfTwEw7cM5LUkkXxXGn+g//3v4HJwzdUWsSNLyP1Sa34L5t8MxH4FRB+W6GpG8l/+hsvLx8MHiU9XdkX3bnk59APDDH/6QlpaWmCvqW2GEStlQGDs115WI7JV9JVTyf6D2tSfCx17oM4NlH5c99cHZZ5/Nfvvtx7333kt7ezsXXngh3/zmN2lububiiy+moaGBTCbD1772Nd58803Wrl3LGWecQXV1NfPmzUu0zvx+p21eGz7dr+eMj0hc/nhN+EzrOO1/JMy4Yaers6c+mDt3Lvfddx/PPvss7s7MmTN5/PHHaWxsZNy4cfzhD38Awj1BVVVV/OAHP2DevHlUV1fHW3Mf8rv7s/KJ8O+kv89tHSIxmzt3LnPnzmXatGkcc8wxLF26lGXLlnHkkUfy8MMP8+Uvf5knnniCqqqq1GvL75bKysehfDiMOTLXlUi+eYcWRRrcnWuvvZZPfepTO6x7/vnnmTNnDl/96lc566yzuO66dOcCyt+Wyua1sGQ2HHSGrpiVvJA99cE555zDbbfdxtat4SN216xZw7p161i7di2VlZVcfvnlXH311Tz//PM7fG/S8rOl4g6//xfIdMJZmrFN8kP21AczZszgsssu46STTgJgyJAh3HnnnSxfvpyrr76aoqIiSktLufnmmwGYNWsW06dPZ9y4cYkP1Obn1Acv3hfuRn7vt+DkK9MpTPKepj7o39QH+dkv6GgOg7Mn/lOuKxEpOPnZ/Tn2o+GyfH2Mhkjq8rOlAgoUkRzJ31ARScC+Nga5J/b2Z1SoiPRTeXk569evz+tgcXfWr19Pefmef6RvomMqZjYduBEoBn7m7jf0Wn8F8F1gTbTox+7+syRrEtlTNTU1NDQ00NjYmOtSElVeXk5NTc0ef39ioWJmxcBNwNlAA/Ccmc1295d6bfpLd9d5XxnwSktLt/tEQOlbkt2f44Hl7r7C3TuAe4DzE9yfiAwASYbKeKA+63lDtKy3i8xsoZndZ2YHJFiPiKQg1wO1DwAT3f0o4GHgv/vayMxmmVmdmdXle39WZF+X5EDtGiC75VHDtgFZANx9fdbTnwH/3tcLufutwK0AZtZoZqt2se9q4K3dLTgBqmNHA6UW1bGjXdVyYH9eJMlQeQ6YbGaTCGFyCXBZ9gZmNtbdX4+ezgSW7OpF3X30rrYxs7r+3KOQNNWxo4FSi+rYUVy1JBYq7t5lZlcCDxFOKd/m7ovN7Hqgzt1nA58zs5lAF7ABuCKpekQkHYlep+Luc4A5vZZdl/X4WuDaJGsQkXTleqA2KbfmuoCI6tjRQKlFdewollr2uflURGRgy9eWiojkSF6FiplNN7OXzWy5mV2T4n4PMLN5ZvaSmS02s6ui5SPN7GEzWxb9OyKleorN7G9m9vvo+SQzeyY6Lr80s7KU6hgeXdS41MyWmNlJuTgmZvaF6P9lkZndbWblaR0TM7vNzNaZ2aKsZX0eAwt+FNW00MyOSbiO70b/NwvN7DdmNjxr3bVRHS+b2Tm7s6+8CZWse41mAFOAS81sSkq77wL+1d2nACcC/xzt+xrgUXefDDwaPU/DVWx/ev47wH+4+7uBjcDHU6rjRuBBdz8UODqqKdVjYmbjgc8Bte5+BOFM5CWkd0zuAKb3WrazYzADmBx9zQJuTriOh4EjootPXyE6aRL97l4CHB59z0+i91f/uHtefAEnAQ9lPb8WuDZHtfyOcCPly8DYaNlY4OUU9l1D+EU9E/g9YIQLmkr6Ok4J1lEFrCQat8tanuoxYdvtIiMJZzt/D5yT5jEBJgKLdnUMgP8ELu1ruyTq6LXuQuCu6PF27x3CZSEn9Xc/edNSof/3GiXKzCYC04BngDG+7eK+N4AxKZTwQ+BLQHf0fBTQ5O5d0fO0jsskoBG4PeqK/czMBpPyMXH3NcD3gNXA68AmYD65OSY9dnYMcvk7/I/AH+OoI59CJefMbAhwP/B5d9+cvc5D5Cd6qs3M3gesc/f5Se6nn0qAY4Cb3X0a0Eyvrk5Kx2QE4e74ScA4YDA7dgNyJo1jsCtm9hVCF/6uOF4vn0Jll/caJcnMSgmBcpe7/zpa/KaZjY3WjwXWJVzGKcBMM3uNMNXEmYRxjeFm1nOhY1rHpQFocPdnouf3EUIm7WPyHmCluze6eyfwa8JxysUx6bGzY5D673A0Udr7gA9HAbfXdeRTqLx9r1E0kn8JMDuNHZuZAf8FLHH3H2Stmg18NHr8UcJYS2Lc/Vp3r3H3iYSf/0/u/mFgHvDBtOqIankDqDezQ6JFZwEvkfIxIXR7TjSzyuj/qaeO1I9Jlp0dg9nAR6KzQCcCm7K6SbGzMDPjl4CZ7t7Sq75LzGxQdO/eZODZfr9wkoNkaX8B5xJGsV8FvpLifk8lNGEXAguir3MJ4xmPAsuAR4CRKdZ0OvD76PG7ol+K5cCvgEEp1TAVqIuOy2+BEbk4JsA3gaXAIuAXwKC0jglwN2Esp5PQevv4zo4BYVD9puj390XCGask61hOGDvp+Z29JWv7r0R1vAzM2J196YpaEYlVPnV/RGQAUKiISKwUKiISK4WKiMRKoSIisVKoiEisFCoSCzObambnZj2fGdf0E2b2eTOrjOO1JHm6TkViEV3uXesJfIRtdNtBrbv3+6MszKzY3TNx1yK7ppZKgTGzidGEST+NJi6aa2YVO9n2IDN70Mzmm9kTZnZotPwfogmPXjCzx6PbIq4HPmRmC8zsQ2Z2hZn9ONr+DjO72cyeNrMVZnZ6NGnQEjO7I2t/N1v40LjFZvbNaNnnCDcCzjOzedGyS83sxaiG72R9/1Yz+76ZvQCcZGY3WJg4a6GZfS+ZIyo7SONybX0NnC/CnBpdwNTo+b3A5TvZ9lFgcvT4BMK9RBAuIR8fPR4e/XsF8OOs7337OWGCoHsIl6GfD2wGjiT8UZufVUvP5erFwGPAUdHz14Dq6PE4wv08owl3Qv8JuCBa58DF0eNRhEvMLbtOfSX/pZZKYVrp7guix/MJQbOdaBqHk4FfmdkCwgRCY6PVfwXuMLNPEgKgPx7w8O5+EXjT3V90925gcdb+Lzaz54G/EWYd62vmvuOAxzzcddxzu/7fR+syhDvFIcyb0gb8l5l9AGjZ4ZUkEYl+7o8MWO1ZjzNAX92fIsJERlN7r3D3T5vZCcB5wHwzO3Y39tnda//dQEl0N+wXgePcfWPULSrvx+tma/PZDzErAAAA+UlEQVRoHMXDh9kdT7gr+YPAlYSpICRhaqlInzxMMrXSzP4B3p6U+ejo8UHu/oyHD4ZrJMy9sQUYuhe7HEaYyGmTmY0hzNfaI/u1nwVOM7PqaN7US4E/936xqKVV5eED7b5AmCNXUqCWiryTDwM3m9lXgVLCuMgLwHfNbDJhjOTRaNlq4Jqoq/Tt3d2Ru79gZn8jTFFQT+hi9bgVeNDM1rr7GdGp6nnR/v/g7n3NhTIU+J2ZlUfb/cvu1iR7RqeURSRW6v6ISKzU/RHM7CbCvK3ZbnT323NRj+zb1P0RkVip+yMisVKoiEisFCoiEiuFiojESqEiIrH6X+qDTv0w5RoYAAAAAElFTkSuQmCC\n",
- "text/plain": [
- ""
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "steps = np.arange(1, MAX_ESTIMATORS, 1)\n",
- "srfc = StreamingRFC(n_estimators=1,\n",
- " max_n_estimators=np.max(steps))\n",
- "\n",
- "%time train_scores, test_scores = inc_partial_fit(x, y, srfc=srfc, steps=steps, sample=1)\n",
- "print(f\"With {len(srfc.estimators_)}: {train_scores[-1]} | {test_scores[-1]}\")\n",
- "plot_auc(steps, train_scores, test_scores)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### Partial random forest\n",
- "1 estimator per 10 % subset"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "CPU times: user 20.1 s, sys: 15.6 ms, total: 20.1 s\n",
- "Wall time: 20.5 s\n",
- "With 119: 0.8225306784087263 | 0.6294571314102564\n"
- ]
- },
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAARwAAAELCAYAAAALJznDAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3Xl4VdW5+PHvSxISEkJmxjAECLMKyqQ4VlFw7uSIV1uVDlptr7/e6m1rq+3ttff29mpbq1Wv1taBWkeqVBQBZ5mReQhhyABkDoTMOe/vj7XRYwgQIPucnJz38zzn4ez5ZZO8rLX22muJqmKMMaHQLdwBGGOihyUcY0zIWMIxxoSMJRxjTMhYwjHGhIwlHGNMyFjCMcaEjCUcY0zIWMIxxoRMbLgD6CiZmZk6ZMiQcIdhTFRasWJFmapmHW2/LpNwhgwZwvLly8MdhjFRSUR2tmc/q1IZY0LGEo4xJmQs4RhjQqbLtOG0pampicLCQurr68Mdiu8SEhLIzs4mLi4u3KEYc1hdOuEUFhaSnJzMkCFDEJFwh+MbVaW8vJzCwkJycnLCHY4xh9Wlq1T19fVkZGR06WQDICJkZGRERUnORLYunXCALp9sDoqWv6eJbF0+4Rhjjk1LQFlXVM3/fbCdqtrGDj13l27D6Qyqqqp47rnn+O53v3tMx1188cU899xzpKam+hSZiWaqSnVdE8VV9ZTVNFBZ20hNQzNrCqp5a8MeKmubABiamcR5o3p32HUt4fisqqqKP/7xj4cknObmZmJjD3/7582b53doJso0Ngd4f2sp89fvYdHmUkr3NxyyT8/4WM4f3ZvzRvZmytB0+qX06NAYLOH47O6772bbtm2MHz+euLg4EhISSEtLY9OmTWzZsoUrr7ySgoIC6uvrufPOO5k9ezbw+asaNTU1zJw5kzPPPJOPPvqIAQMG8Nprr9GjR8f+IJiuQ1UprKxjdUEVW/fuZ1dFLTsrasnbW8P+hmaSE2I5e0QWEwam0j+1B1nJ8aQldqdnfCzpSd3pHutfS0vUJJz7/rGeDcX7OvScY/r34meXjT3iPg888ADr1q1j9erVLF68mEsuuYR169Z99vj6ySefJD09nbq6OiZNmsRXv/pVMjIyvnCOrVu38vzzz/P4449z1VVX8dJLLzFr1qwO/buYyFW6v4EVOytZU1jF2qJq1hfvo+KAa3vpJtAvpQeDMxK5bHx/po/uw7Thmb4mlSOJmoTTWUyePPkLfWV+97vf8corrwBQUFDA1q1bD0k4OTk5jB8/HoDTTjuNHTt2hCxe0/ns3VdPYWUtG3fv54XlBawprAYgtpswok8y00f3YdyAXowfmMaIvj2Jj40Jc8Sfi5qEc7SSSKgkJSV99n3x4sUsWLCAjz/+mMTERM4999w2+9LEx8d/9j0mJoa6urqQxGrCb399E8t3VrKmoJr8shpWF1Sxs7z2s+0j+yTzoxmjmJyTztj+vUiI6zzJpS1Rk3DCJTk5mf3797e5rbq6mrS0NBITE9m0aROffPJJiKMznYmqsru6nlW7qvhwWxkrdlSypWQ/ByfHHZDagzH9e3HD1MEM692T7NQeDO/dM6L6YFnC8VlGRgbTpk1j3Lhx9OjRgz59+ny2bcaMGTz66KOMHj2akSNHMnXq1DBGakKpsTnAlr37+SCvjPe2lLKnup6S/Q3UNDQDkBwfy6mD05h5Ul8mDUln/MBUkuIj/9dVusrc4hMnTtTWA3Bt3LiR0aNHhymi0Iu2v2+kKatpYPHmUv7xaTEf55fT2BwAYEy/XgzNSiKzZzzDspIYOyCFkwekEBsTOf1yRWSFqk482n6RnzKN6WT21TexraSGoqo6WgLKtpIaFm0uZW2Ra9wdkNqDG6YO5pSBqUwcnEb/1Ojp4mAJx5gToKrUNrawo/wAK3ZW8tb6vXy0rYxAUMWhm8CEQWncNX0EZ4/I4uTslIhqd+lIviYcEZkBPATEAE+o6gOttg8CngZSvX3uVtV53rZ7gJuBFuAOVZ3vZ6zGtFdLQFm2o4IXlhfw1vq9n7W7AAzJSOQ75w5jwsA0BqYnEhsjZCbFk5Jo4xSBjwlHRGKAh4HpQCGwTETmquqGoN1+Arygqo+IyBhgHjDE+34NMBboDywQkRGq2uJXvMa01tQSoGR/AxU1jZTsr2dXRa17gpRXRvmBRnrGx3LJSf3IyUqiX0oCpw5KIzutR9SWXtrDzxLOZCBPVfMBRGQOcAUQnHAU6OV9TwGKve9XAHNUtQHYLiJ53vk+9jFeY6hpaOaDrWXMX7+Htzd8sfQC0KdXPGfmZjJ9TB++NKo3id2tVeJY+Hm3BgAFQcuFwJRW+/wceEtEvgckARcEHRvcKaXQW2dMh1FVyg80UlxVR0FFHW+sLWbBhhIaWwKk9IjjkpP6MWFQKulJ3clKjqd/ag/69EoId9gRLdzp+Vrgz6r6PyJyOvBXERnX3oNFZDYwG2DQoEE+hXhijnd4CoAHH3yQ2bNnk5iY6ENk0ScQUFYVVLJwUwnLtleyvriaA42f19LTk7pz3ZRBXDS2LxOHpBEXQY+lI4WfCacIGBi0nO2tC3YzMANAVT8WkQQgs53HoqqPAY+B64fTYZF3oMMNT9EeDz74ILNmzbKEc5wCAWVXRS21jS18mFfGUx9up7i6nthuwtgBKXzttGyGZCYxwCu5jO7XK2wvNUYLPxPOMiBXRHJwyeIa4LpW++wCzgf+LCKjgQSgFJgLPCciv8U1GucCS32M1TfBw1NMnz6d3r1788ILL9DQ0MCXv/xl7rvvPg4cOMBVV11FYWEhLS0t/PSnP2Xv3r0UFxdz3nnnkZmZyaJFi8L9V4kITS0BPtpWzpvr9vDOxr2UBI35MiUnnR/NHMW5I3uT0sOeGoWDbwlHVZtF5HZgPu6R95Oqul5E7geWq+pc4C7gcRH5Aa4B+SZ1XZ/Xi8gLuAbmZuC2E35C9c+7Yc/aEzrFIfqeBDMfOOIuwcNTvPXWW7z44ossXboUVeXyyy/nvffeo7S0lP79+/PGG28A7h2rlJQUfvvb37Jo0SIyMzM7Nu4uaE91Pc8u2cnzS3dRVtNIUvcYzh3Zm7NyM0lNjGNQehJj+vc6+omMr3xtw/H61Mxrte7eoO8bgGmHOfY/gP/wM75Qe+utt3jrrbeYMGECADU1NWzdupWzzjqLu+66ix/96EdceumlnHXWWWGONDLsKq/lk+3lvLellDfX7aFFlfNH9ebqSYM4Kzez0785HY3C3WgcOkcpiYSCqnLPPffwrW9965BtK1euZN68efzkJz/h/PPP5957723jDKaxOcDcT4t5fukuVuysBCA1MY4bzxjCjacPYVCGtXd1ZtGTcMIkeHiKiy66iJ/+9Kdcf/319OzZk6KiIuLi4mhubiY9PZ1Zs2aRmprKE0888YVjrUoFu6vrmLO0gOeX7qJkfwPDspK4e+Yozh/Vm2FZPenWzTrbRQJLOD4LHp5i5syZXHfddZx++ukA9OzZk2eeeYa8vDx++MMf0q1bN+Li4njkkUcAmD17NjNmzKB///5R2WisqqzcVcmTH+zgzfV7CKhydm4Wv/l6DmflZlqP3ghkw1N0IV3l76uqLNxUwu8X5rG6oIpeCbFcO3kQs6YOZmC6VZk6IxuewkSU2sZmPi2oZl1RNW9v3MvS7RUMzkjk/ivG8tVTs7vE4FPGEo4JI1Xl/a1lPP5+Ph9tK6fFG9NhQGoP7r9iLNdOHmS9fbuYLp9wVDUq6vqRVjVeV1TN/f/YwNIdFfROjmf22UOZNCSNk7NTyewZf/QTmIjUpRNOQkIC5eXlZGRkdOmko6qUl5eTkND5XyxcsGEvj7+fz5LtFWQkdeeXV47j6xOzO9VUJsY/XTrhZGdnU1hYSGlpabhD8V1CQgLZ2dnhDuOwmloC/GreRp76cAeD0hP5txkjmTV1ML0S7BWDaNKlE05cXNwXJp0zobe6oIrH3tvGB1vL2FffzDemDeHfLx5tbTNRqksnHBM+eSU1/GHhVl5dXUx6UndmjOvLxSf149yRvcMdmgkjSzimw6wrquallYWsL97Hsh0VxMd249vnDOO284aRbFUngyUc0wH21Tfx87nreWVVEfGx3Rjdrxe3nzecG88YYk+czBdYwjEnpKCilpufXkZ+6QFmnz2U75473MaaMYdlCccctw+2lnHnnFU0tQT4yzcnc8Zwe8nUHJklHHPMGpsD/GHhVn6/KI/hWT159IbTGJbVM9xhmQhgCccck08Lqrjn5bVs2L2Pr5w6gF9cMc7eczLtZj8ppl2qahv59ZubmbNsF5k943nshtO4cGzfcIdlIowlHHNEzS0BXl5ZxK/f3ERVXRPfnJbD9y/Itcfc5rhYwjFtam4J8NrqYn6/cCs7ymuZMCiVv155kg1Ebk6IJRxziEWbS/jFPzaQX3aAMf168dgNpzF9TJ8u/QKsCQ1LOOYz++ubuPvltbyxZjdDs5J4dNZpXDimj40XbDqMJRwDwNa9+/nWMyvYWV7LXdNH8K1zhtkslKbDWcKJcrWNzfx+YR5PvJ9Pr4Q4nrl5CqcPywh3WKaL8jXhiMgM4CHczJtPqOoDrbb/L3Cet5gI9FbVVG9bC3Bwqsxdqnq5n7FGo/e2lPLvr6ylsLKOr52Wzd0zR9m7T8ZXviUcEYkBHgamA4XAMhGZ6822CYCq/iBo/+8BE4JOUaeq4/2KL5qV1zTwi9c38OrqYoZmJfG32VOZMtRKNcZ/fpZwJgN5qpoPICJzgCtw84W35VrgZz7GE/WKqur4w8I8Xl1VRHMgwB1fGs53zxtuU+KakPEz4QwACoKWC4Epbe0oIoOBHGBh0OoEEVkONAMPqOqrfgUaDdYVVXPTU8vYX9/EleMHcOvZOQzvnRzusEyU6SyNxtcAL6pqS9C6wapaJCJDgYUislZVtwUfJCKzgdkAgwYNCl20ESQQUOYsK+A/3thAamJ35txxpiUaEzZ+JpwiYGDQcra3ri3XALcFr1DVIu/PfBFZjGvf2dZqn8eAx8DNvNkhUXchxVV13DlnFct2VDJ1aDoPXTOBPr06/8wOpuvyM+EsA3JFJAeXaK4Brmu9k4iMAtKAj4PWpQG1qtogIpnANOC/fIy1y/l4Wzm3PbeSxuYAv/n6KXz11AHWU9iEnW8JR1WbReR2YD7usfiTqrpeRO4HlqvqXG/Xa4A5+sWZ3EYDfxKRANAN14ZzuMZm08p7W0q59S/LGZieyJ9srBrTiUikzdh4OBMnTtTly5eHO4ywW7hpL995ZiVDs3ry7C1TSE/qHu6QTBQQkRWqOvFo+1nf9S5CVXl+6S5u/csKcvtYsjGdU2d5SmVOwNsb9vK/b29hw+59nDMiiz9ef6qNwmc6JfupjHDPL93FPS+vZVhWEr/+6kl85dRsm9XSdFqWcCLYyysLuefltZw7MotHZ51mPYZNp2f/FUao9cXV3P3yWs4YlmHJxkQMSzgRqPJAI999diXpid35/bUTLNmYiGFVqgizo+wA3/jzMnZX1/PcLVPIsOEkTASxhBNB1hRWcdNTy1BVnrtlChOHpIc7JGOOiSWcCLEkv5ybn15OSo84nrllCjmZSeEOyZhjZgknAmzas4+bn15On17xPHPLFPql9Ah3SMYcF2s07uT2VNfzjaeWkRQfY8nGRDxLOJ1Yyb56rnv8E/bVNfHkTZMs2ZiIZwmnkyqqquO6J5awZ189f/7mZMb2Twl3SMacMGvD6YRW7ark1r+soKGphadumsQkexplughLOJ3Mok0lfOfZFfROTuD5W6eQ28eGAzVdhyWcTqK5JcAzn+zkl29sZGTfZJ7+5mSbI8p0OZZwOoH80hpu/ctytpUe4KzcTB6+/lR6JcSFOyxjOpwlnDA70NDM7L+uoLK2icf/ZSIXjO5tYw+bLssSThipKv/20hryS2t45pYpnDEsM9whGeMreyweRos3l/LGmt3cdeFISzYmKljCCZPmlgD/+c+NDMlI5NazhoY7HGNCwhJOmPx9RSFb9tbwoxmj6B5r/wwmOthPehi8sWY3P5u7nklD0pgxrm+4wzEmZCzhhNhLKwq57bmVnJKdwmM3TLQnUiaq+JpwRGSGiGwWkTwRubuN7f8rIqu9zxYRqQradqOIbPU+N/oZZ6hsK63hJ6+uY+rQdP568xTSbN4oE2V8eywuIjHAw8B0oBBYJiJzg6fsVdUfBO3/PWCC9z0d+BkwEVBghXdspV/x+q2xOcCdc1aRENeNB6+2cYhNdPKzhDMZyFPVfFVtBOYAVxxh/2uB573vFwFvq2qFl2TeBmb4GKvvXl1VxLqiffzqyyfRNyUh3OEYExZ+JpwBQEHQcqG37hAiMhjIARYe67GRIBBQHns/nzH9elkjsYlqnaXR+BrgRVVtOZaDRGS2iCwXkeWlpaU+hXbiFm0uIa+khtlnD7VGYhPV/Ew4RcDAoOVsb11bruHz6lS7j1XVx1R1oqpOzMrKOsFw/aGqPPruNvqnJHDJyf3CHY4xYeVnwlkG5IpIjoh0xyWVua13EpFRQBrwcdDq+cCFIpImImnAhd66iDP302KW7ajkO+cNtzm/TdTz7SmVqjaLyO24RBEDPKmq60XkfmC5qh5MPtcAc1RVg46tEJFf4JIWwP2qWuFXrH6pqm3kF69v4JSBqVw3eVC4wzEm7Hx9W1xV5wHzWq27t9Xyzw9z7JPAk74FFwIPLthKZW0TT39zHDHdrO3GGCvj+6TyQCNzlu3iq6cOsAHQjfFYwvHJc0t3Ud8U4OYz7U1wYw6yhOODxuYAT3+0g7NyMxnZ1wZBN+YgSzg+eHP9Hkr2N3DzmTnhDsWYTsUSjg9eWVlI/5QEzs7tnH2DjAkXSzgdrLymgfe2lnH5+AF0sydTxnxBuxKOiEwVkeSg5V4iMsW/sCLXG2t30xJQrpzQP9yhGNPptLeE8whQE7Rc460zrbyyqohRfZMZ1bdXuEMxptNpb8KRVj2BA9gUM4fYuHsfq3ZVccX4iH2x3RhftTfh5IvIHSIS533uBPL9DCwSPbRgK8nxsfYagzGH0d6E823gDNwb24XAFGC2X0FFovXF1by5fg/fODOHlESbpteYtrSrWqSqJbiXLM1hPLhgK8kJsdb3xpgjaFfCEZGncGMLf4GqfrPDI4pAK3ZW8vaGvdw1fQQpPax0Y8zhtLfh9/Wg7wnAl4Hijg8n8qgqD/xzI5k947n5LCvdGHMk7a1SvRS8LCLPAx/4ElGEeWdjCct2VPLLK8eR2N0e3BlzJMfb0zgX6N2RgUSqR9/dxsD0Hlw9aeDRdzYmyrW3DWc/n7fhKLAX+De/gooUnxZUsXxnJT+9dIwNH2pMO7S3SpXsTU6Xi2vDgTYakaPNUx9up2d8LFdNzA53KMZEhPaWcG4B7sTNnrAamIob9PxL/oXWuZXsq+f1Nbu54fTBJCfYkylj2qO99YA7gUnATlU9Dzclb9WRD+na5m/YS3NArVexMcegvQmnXlXrAUQkXlU3ASP9C6vzW7yphEHpiQzv3TPcoRgTMdr7HLdQRFKBV4G3RaQS2OlfWJ1bfVMLH24r4+qJA20mTWOOQXsbjb/sff25iCwCUoA3fYuqk/skv5z6pgDnjrKeAcYci2Puqaaq7/oRSCRZvLmUhLhunD40I9yhGBNRfO08IiIzRGSziOSJyN2H2ecqEdkgIutF5Lmg9S0istr7HDJFcLi0BJQFG/dyxrBMEuJiwh2OMRHFt774IhIDPAxMxw1psUxE5qrqhqB9coF7gGmqWikiwXWUOlUd71d8x+vZJTsprKzjxxePDncoxkQcP0s4k4E8Vc1X1UZgDnBFq31uBR5W1Ur4bBiMTquspoH/nr+ZM4dnMmNc33CHY0zE8TPhDAAKgpYLvXXBRgAjRORDEflERGYEbUsQkeXe+it9jLPdHlywhfqmFu67Yqw9nTLmOIT79eZY3OsS5+J6Mb8nIiepahUwWFWLRGQosFBE1qrqtuCDRWQ23siDgwb52wGvobmF11YXc9kp/RmWZX1vjDkefpZwioDgV6izvXXBCoG5qtqkqtuBLbgEhKoWeX/mA4txvZu/QFUfU9WJqjoxK8vfSefe31LG/vpmLjvFpn8x5nj5mXCWAbkikiMi3XFDlLZ+2vQqrnSDiGTiqlj5IpImIvFB66cBGwij19cUk5oYx5nDM8MZhjERzbcqlao2i8jtwHwgBnhSVdeLyP3AclWd6227UEQ2AC3AD1W1XETOAP4kIgFcUnwg+OlWqNU3tfD2hr1cdkp/G4bCmBPgaxuOqs4D5rVad2/QdwX+1fsE7/MRcJKfsR2LxZtLONDYwqUnW3XKmBNh/123w4KNJaT0iGPq0PRwh2JMRLOEcxSBgLJ4cylnj8gi1qpTxpwQ+w06ivXF+yiraeC8kf4+BTMmGljCOYqFm0oQgXNGWMIx5kRZwjmKRZtLOCU7lYye8eEOxZiIZwnnCMpqGvi0sIrzRtq4N8Z0BEs4R/Dmuj2owkXj+oQ7FGP8V78P9u/19RLhfpeqU3t9TTHDspIY2Sc53KEY07FUoTwPCpbC/t1QsgE2vQHN9ZAy0H0SesGXfgJ9O65LnCWcwyjZX8+S7RXc8aVcezPchJeqlxQ2QtVOqCmB2HhIzISUAZA50v0JrpQSEwfSDaoLoXA5bH8P9q6Fql0Q38tt378XGvd/fo3ETJgwC9KHQtEKd419xRBo7tC/iiWcw/jnWleduuTkfuEOxXQ1qlCRD3vXQUw89B4NeQtg61twoMztk5INPdJAW1zCqNxx5HNmjYaWBnfe1nqkQ//xMOA0aKiBlkYYdj70HgWDz4TUQRCXcOhxPrCEcxhvrN3NiD49GWHVqeikCrXl0LAPmupc1WPXx1C8yiWF2AT3SzzqUhhyJiT3dVWU7kmQOhg04EoUJRvceWLioZf3n9c790PhskOvmTHc/fJrAPauh/pqCDRB9mSY8h3oOw7ScqBnH5c0DpS6UkzxSsh7B+J6wPjrQQRamiF1IPQeA31Phm6do7nWEk4b6ptaWLWrkm9Oywl3KCZUAi3ul3b9K1C6Ecq3uWQTrGdfGHAq5JwNjbWw/V3YfPBVQeGz2a97pLmSRKCp7Wv17AMzfg0DJ7tktmctZE9y525v9T0mFroPhrTBMGQanPG94/lbh5wlnDasL66mqUU5dXBauEMxB7U0w4qnXLVj5EwYcyUkHubdNlVXglj/ChQscVWKnr0hIcVtb6pzn5YG10ZRWwllW+BAidu338lwyjWuPSMh1bV59BsPGcO+mBBUXbWoYAnUlEJmLtRXwe41Lrb0oa6E0bOPa4ytLnSlo5EzID6o5Dxkmn/3rZOxhNOGFTsrATh1kCWcsFN1pYh37ofSTZDU2yWd13/gkkNKtqvOdIt1DaWxCVC03LV5xHR31ZEDJa5qU18NiGuviOvhqjndYl0iGn4+5F7oqkix3dsXm4h7gtPepziZucd7F7oMSzhtWLmzikHpiWQlW+/ikGvY7xJK3juupHCgFGr2QPowuPoZlxAKl8POD6FyO1QXQc1e1+4RaIGmWkgbAuf8CEZd8nmpxnQKlnBaUVVW7Kpk2jCb5C6k9u+FBT+D9a9Cc52rygyc7EoPg8+AU6517RYAAye5j4k4lnBaKayso3R/A6dZ+01o1FbAmr/B4gdcu8qEWXDS12DgFOhmEw12NZZwWlm5y7XfTLD2m463rxg+fMh1YGuscY+Lq4vc05zB0+Cyh6ydo4uzhNPKB1vLSOoew6i+1v/muBUshZ0fubaV5npoboT9xW5doAX6T3BtK+lDYexXYNxXOrT7vOm8LOEEqWlo5o21u7nkpH42ut/xKN0M838MeW+75bgk72lQHCT3c+0wZ37fNeqaqGQJJ8gba4qpbWzh6kkDj76zcRr2u16xeQtcdSmuB1xwH5x2E/RIDXd0ppOxhBPkb8sKGJqVZA3GR7L9fVj9nOvYtq/YvWHc0uC2jfsazHgAetroiKZtlnA8eSU1rNxVxb9fPMreDm8tEICdH8CyJ2DDa679pbnBlWZOuxGGX+B61KZaydAcmSUcz7tbSgGie+4pVdjxgeuuX1vhut/XVcCav8O+Quie7DrUnfkD16NXtdO8FGgig68JR0RmAA/hZt58QlUfaGOfq4Cf4958+1RVr/PW3wj8xNvtl6r6tJ+xfpJfzuCMRPqn9vDzMp3Dzo9d57rBZ8LKp11nuwEToGTT5w2+B0mM6/Y//T7Xczcu6P5YSdAcI98SjojEAA8D04FCYJmIzA2esldEcoF7gGmqWikivb316cDPgIm4RLTCO7bSj1gDAWXp9gpmjO3rx+k7l9XPwWu3uVcBYnu4xJM+DD5Z4pLJRb+Ck69xbzw31gBqrweYDuNnCWcykKeq+QAiMge4AgieI/xW4OGDiURVS7z1FwFvq2qFd+zbwAzgeT8C3bB7H9V1TUwd1oVn1qytgPf/Bz7+A+ScA6f+i3uylDvd9YVpqnP7dU/8/JiEXuGJ1XRZfiacAUBB0HIhMKXVPiMARORDXLXr56r65mGOHdD6AiIyG5gNMGjQoOMO9JP8cgCmDo3Q96eaG9wLjcUrXR+XtBwo2+w62WWOcA29Sx93JZYJN8DFv3FvTJ/0tc/PEZxojPFJuBuNY4Fc4FwgG3hPRNrd5VRVHwMeA5g4caIebxCf5FcwJCORfikR2H6z82N46RbXqHtYAmOvhHPudsNKGhMmfiacIiD4OWm2ty5YIbBEVZuA7SKyBZeAinBJKPjYxX4E6dpvypk5LsLGLm5pclWkd3/thrS86q/uhceqnW5oy6yRbnyYvevd4FFZI8IdsTG+JpxlQK6I5OASyDXAda32eRW4FnhKRDJxVax8YBvwKxE52APvQlzjcofbu7+effXNjMuOoIbRiu3w0s1udP2Tr3ZVpIPtLcl93LAOB/UZG54YjWmDbwlHVZtF5HZgPq595klVXS8i9wPLVXWut+1CEdkAtAA/VNVyABH5BS5pAdx/sAG5o20vPQDA0MwkP07f8bYucMkGha895V58NCZC+NqGo6rzgHmt1t0b9F2Bf/U+rY99EnjSz/gA8stcwsmJhISz+1N4/mo3JcjVf3E1kh2xAAANm0lEQVRvWxsTQcLdaBx2O8oOkBDXjb69QjMvz3FrboBXvu0mLLtx7uEHEDemE4v6hLO97ABDMpLo1q0T95pVhbfvdQOBX/+iJRsTsSzhlB1gVL8wD7bV0uymcu3Zx40dU1vhph4pXuUagItWwpJH3WRoudPDG6sxJyCqE05TS4BdFbXMPClErzTkveMmT9OA6xPTVAdzb3dDPjQdAMTNGd1cf+ixp1zrXjswJoJFdcIprKyjOaDkZPb0/2Kb5sGca6FbnDdf9PtuSpOqAjj1Bsga5SaQbzoAyf3dtK79xn8+r/SUb9ub2SbiRXXC2V5WA4TgCZWq66SXOhhuWwL578KL33CTsN3wspsG5XBGX+pvbMaEUFQnnPxQ9cHZ8b6bDfKS37o3skfOgG9/4HoCp9v85SZ6RHXC2V52gJQecaQltXNq1+OhCu/9t5uidvz1n6/PGObfNY3ppKK6USC/9ID/1amPfufaYc7+oXtD25goFt0Jp6yGYVk+NhhvWwQLfg5jroTJt/p3HWMiRNQmnP31Tezd18Cw3j6VcEo2wgs3QuZIuOJhG47TGKI44RxsMPalhFNTAs9+3VWhrn8B4kPw2N2YCBC1jcbbSt0jcV8Szke/d3M23foOpB7/SITGdDVRXcKJ6SYMSu/goTWb6mDVX90MB/0ndOy5jYlwUZtwtpXWMDg9ke6xHXwL1r8CdZUw6ZaOPa8xXUBUJ5yhflSnlj3hBi7PObvjz21MhIvKhNMSUHaU1Xb8E6qdH7thPyfdak+ljGlDVCacwspaGlsCHd9g/N5/QVIWTJjVsec1pouIyoTz+ROqDizhFK6AbQvh9NttjidjDiMqH4t//tJmB5Rwmupg3cvw4UNuetxJN5/4OY3poqIy4eyqqCU5IZbUxLhjOzAQgE+fh8X/6Wa4PPkqN+xE5Q43P/flf4D4MI8eaEwnFpUJp6CiloFpicixNuz+4w7Xx6b/BCjdDHO/52ZOuOEVGHqeNRQbcxTRmXAq6469/WbHhy7ZTL0NLvwlNNfBrk/c4FlxEThFsDFh4GujsYjMEJHNIpInIne3sf0mESkVkdXe55agbS1B6+d2VEyqSmGlK+G0W0szzPshpAyEL/3EDfXZPQmGn2/Jxphj4FsJR0RigIeB6bg5xJeJyFxV3dBq17+p6u1tnKJOVcd3dFylNQ3UNwUYeCyvNCx5BErWu/m77QmUMcfNzxLOZCBPVfNVtRGYA1zh4/XapaCiFqD971CVboGFv4QRM2H0ZT5GZkzX52cbzgCgIGi5EJjSxn5fFZGzgS3AD1T14DEJIrIcaAYeUNVXOyKogoo6AAamH6UqNP/HsGcN7Nvtqk2XPWSNwsacoHB3/PsHMERVTwbeBp4O2jZYVScC1wEPisghgwCLyGwRWS4iy0tLS9t1wYMlnOwjteHUlMInf4SyPKgtd8kmuU97/07GmMPws4RTBAwMWs721n1GVcuDFp8A/itoW5H3Z76ILAYmANtaHf8Y8BjAxIkTtT1BFVTWkpUcT0JczOF32vS6m6zu+r+7+aGMMR3CzxLOMiBXRHJEpDtwDfCFp00i0i9o8XJgo7c+TUTive+ZwDSgdWPzcSmoqGNg2lGqUxtecx35+oztiEsaYzy+lXBUtVlEbgfmAzHAk6q6XkTuB5ar6lzgDhG5HNdOUwHc5B0+GviTiARwSfGBNp5uHZddFbVMGpJ2+B1qK9wsC9PutDYbYzqYrx3/VHUeMK/VunuDvt8D3NPGcR8BJ3V0PE0tAXZX1zEwfcDhd1rzgpuKd0zYH6gZ0+VEVU/j3VX1BJS2O/0FAvDuA/Duf0H2JOh3SugDNKaLC/dTqpBK6N6N71+Qy4RBqYdu3PAKvPtrOOVa+Je5Vp0yxgdRVcLpnZzA9y8Y0fbGlX9xMyxc8bB7dcEY0+HsNwugcifkvwvjZ1myMcZH9tsFsPo59+f4a8MbhzFdXFRVqQ5RudM9lVr2OAw91yatM8Zn0Ztw9q6Hpy9zry70nwAX/CzcERnT5UVnwinf5pJNTDzcvhwyc8MdkTFRIToTzpJHofEAfOdtyDjknVBjjE+ir9FYFbbMh5xzLNkYE2LRl3DKtkDVThhxYbgjMSbqRF/C2TLf/Zl7UXjjMCYKRV/C2foW9B4LqQOPvq8xpkNFV8Kpr4ZdH1t1ypgwia6EU1MCAya6AdGNMSEXXY/FM3Ph5vnhjsKYqBVdJRxjTFhZwjHGhIwlHGNMyFjCMcaEjCUcY0zIWMIxxoSMJRxjTMhYwjHGhIyotmtK7k5PREqBnUfZLRMoC0E4R9NZ4oDOE4vFcajOEkt74hisqllHO1GXSTjtISLLVXWixfG5zhKLxXGozhJLR8ZhVSpjTMhYwjHGhEy0JZzHwh2Ap7PEAZ0nFovjUJ0llg6LI6racIwx4RVtJRxjTBhFRcIRkRkisllE8kTk7hBfe6CILBKRDSKyXkTu9Nani8jbIrLV+zMtRPHEiMgqEXndW84RkSXevfmbiHQPQQypIvKiiGwSkY0icnoY78cPvH+XdSLyvIgkhOqeiMiTIlIiIuuC1rV5H8T5nRfTGhE51ec4/tv791kjIq+ISGrQtnu8ODaLyDENDt7lE46IxAAPAzOBMcC1IjImhCE0A3ep6hhgKnCbd/27gXdUNRd4x1sOhTuBjUHLvwb+V1WHA5XAzSGI4SHgTVUdBZzixRPy+yEiA4A7gImqOg6IAa4hdPfkz8CMVusOdx9mArneZzbwiM9xvA2MU9WTgS3APQDez+41wFjvmD96v2Pto6pd+gOcDswPWr4HuCeM8bwGTAc2A/28df2AzSG4djbuh/hLwOuA4Dp0xbZ1r3yKIQXYjtd+GLQ+HPdjAFAApONGv3wduCiU9wQYAqw72n0A/gRc29Z+fsTRatuXgWe971/4/QHmA6e39zpdvoTD5z9UBxV660JORIYAE4AlQB9V3e1t2gP0CUEIDwL/BgS85QygSlWbveVQ3JscoBR4yqvaPSEiSYThfqhqEfAbYBewG6gGVhD6exLscPchnD/H3wT+2RFxREPC6RREpCfwEvB9Vd0XvE3dfxW+Pi4UkUuBElVd4ed12iEWOBV4RFUnAAdoVX0Kxf0A8NpHrsAlwf5AEodWLcImVPfhSETkx7hmgWc74nzRkHCKgOBJqLK9dSEjInG4ZPOsqr7srd4rIv287f2AEp/DmAZcLiI7gDm4atVDQKqIHBxMPxT3phAoVNUl3vKLuAQU6vsBcAGwXVVLVbUJeBl3n0J9T4Id7j6E/OdYRG4CLgWu95LfCccRDQlnGZDrPXnojmvwmhuqi4uIAP8HbFTV3wZtmgvc6H2/Ede24xtVvUdVs1V1CO4eLFTV64FFwNdCGMceoEBERnqrzgc2EOL74dkFTBWRRO/f6WAsIb0nrRzuPswF/sV7WjUVqA6qenU4EZmBq35frqq1reK7RkTiRSQH14i9tN0n9rthrjN8gItxLe3bgB+H+Npn4orFa4DV3udiXPvJO8BWYAGQHsKYzgVe974P9X5g8oC/A/EhuP54YLl3T14F0sJ1P4D7gE3AOuCvQHyo7gnwPK7tqAlX8rv5cPcB18D/sPczvBb3ZM3POPJwbTUHf2YfDdr/x14cm4GZx3It62lsjAmZaKhSGWM6CUs4xpiQsYRjjAkZSzjGmJCxhGOMCRlLOMaYkLGEY3wnIuNF5OKg5cs7apgQEfm+iCR2xLmM/6wfjvGd10V+oqre7sO5d3jnbvd0KiISo6otHR2LOTor4ZjPiMgQb0Csx71Bqd4SkR6H2XeYiLwpIitE5H0RGeWt/7o3mNWnIvKe9zrJ/cDVIrJaRK4WkZtE5A/e/n8WkUdE5BMRyReRc70BoTaKyJ+DrveIiCz34rrPW3cH7qXLRSKyyFt3rYis9WL4ddDxNSLyPyLyKXC6iDwgblC0NSLyG3/uqDlEKLqP2ycyPrgxUZqB8d7yC8Csw+z7DpDrfZ+CezcLXLf7Ad73VO/Pm4A/BB372TJu8Kc5uK77VwD7gJNw/xmuCIrlYBf/GGAxcLK3vAPI9L73x70flYV7K30hcKW3TYGrvO8ZuG75Ehynffz/WAnHtLZdVVd731fgktAXeENtnAH8XURW4waH6udt/hD4s4jciksO7fEPdb/5a4G9qrpWVQPA+qDrXyUiK4FVuNHm2hq1cRKwWN3b3weHVDjb29aCe2Mf3Lg39cD/ichXgNpDzmR8EXv0XUyUaQj63gK0VaXqhhukanzrDar6bRGZAlwCrBCR047hmoFW1w8Asd5byf8PmKSqlV5VK6Ed5w1Wr167jao2i8hk3NvhXwNuxw3XYXxmJRxzzNQNILZdRL4Onw3wfYr3fZiqLlHVe3Ej+w0E9gPJJ3DJXriBuqpFpA9ufN+Dgs+9FDhHRDK9cXavBd5tfTKvhJaiqvOAH+DGVTYhYCUcc7yuBx4RkZ8Acbh2mE+B/xaRXFybzDveul3A3V716z+P9UKq+qmIrMINI1GAq7Yd9BjwpogUq+p53uP2Rd7131DVtsaySQZeE5EEb79/PdaYzPGxx+LGmJCxKpUxJmSsSmWOSEQexo3zG+whVX0qHPGYyGZVKmNMyFiVyhgTMpZwjDEhYwnHGBMylnCMMSFjCccYEzL/H/aPYLaF441KAAAAAElFTkSuQmCC\n",
- "text/plain": [
- ""
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "steps = np.arange(1, MAX_ESTIMATORS, 1)\n",
- "srfc = StreamingRFC(n_estimators_per_chunk=1,\n",
- " max_n_estimators=np.max(steps))\n",
- "\n",
- "%time train_scores, test_scores = inc_partial_fit(x, y, srfc=srfc, steps=steps, sample=0.1)\n",
- "print(f\"With {len(srfc.estimators_)}: {train_scores[-1]} | {test_scores[-1]}\")\n",
- "plot_auc(steps, train_scores, test_scores)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### Partial random forest\n",
- "3 estimators per 10 % subset, but /3 fewer steps"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "CPU times: user 7.44 s, sys: 31.2 ms, total: 7.47 s\n",
- "Wall time: 7.56 s\n",
- "With 120: 0.8246353545629453 | 0.6307688947683706\n"
- ]
- },
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAARwAAAELCAYAAAALJznDAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3Xl8VOXZ//HPRQgJYcvGHiABwyIgIAFZ3BEFN7RaxaXV1oqtj9Xa1qqP1lbbPrW/Pm3Vx6UqVVurohUXVCq4gFpFIEH2fQkkAUJMQkiArHP9/rgPMIYtQM7MJHO9X695ZebMmZkrh8mX+9znPvcRVcUYY0KhRbgLMMZEDwscY0zIWOAYY0LGAscYEzIWOMaYkLHAMcaEjAWOMSZkLHCMMSFjgWOMCZmW4S6gsaSmpmp6enq4yzAmKuXk5Hytqh2Ptl6zCZz09HSys7PDXYYxUUlENjdkPdulMsaEjAWOMSZkLHCMMSHTbPpwDqWmpob8/HwqKyvDXYrv4uPjSUtLIzY2NtylGHNYzTpw8vPzadeuHenp6YhIuMvxjapSXFxMfn4+GRkZ4S7HmMNq1rtUlZWVpKSkNOuwARARUlJSoqIlZ5q2Zh04QLMPm32i5fc0TVuzDxxjzLGprKkjZ3Mpz3++iZLd1Y363s26DycS7Ny5k5dffplbb731mF534YUX8vLLL5OYmOhTZcbA7qpa1haWs3LbLpbll7Ekv4y1heXUBdxc5+kpbTinf6dG+zwLHJ/t3LmTJ5988qDAqa2tpWXLw2/+mTNn+l2aiSLVtQFyi3eztrCctdvLWb29nDWF5Wwu3rN/nQ6tYzklrQPj+vdhcFoHhqQl0qVDfKPWYYHjs3vuuYcNGzYwdOhQYmNjiY+PJykpidWrV7N27Vouu+wy8vLyqKys5I477mDKlCnAgVM1KioqmDhxIqeffjpffPEF3bt35+2336Z169Zh/s1MpKqqrWNJXhnzNxazYusu1u0oJ7d4z/5WSwuB9NQ2DOrWgStOTaNfl3YM6NKeHsmtfe8LjJrAefCdFazcuqtR3/Pkbu351SUDj7jOww8/zPLly1m8eDFz587loosuYvny5fsPXz/33HMkJyezd+9eRowYwRVXXEFKSso33mPdunW88sorPPvss1x11VVMnz6d66+/vlF/F9N0qSpf5e3ki/VfM29jMTmbS6msCSACGSltyOzclomDupLZuS2ZndrRu2Mb4mNjwlJr1AROpBg5cuQ3xso89thjvPnmmwDk5eWxbt26gwInIyODoUOHAjB8+HByc3NDVq+JXOsKy3lrcQFvL95KfuleAAZ0bc81I3syuncKIzOSSUxoFeYqvylqAudoLZFQadOmzf77c+fO5cMPP2TevHkkJCRw9tlnH3IsTVxc3P77MTEx7N27NyS1msgSCCjrdlTwydodvL14Kyu27qKFwOmZHfnp+L6c068TSW0iK2Dqi5rACZd27dpRXl5+yOfKyspISkoiISGB1atX8+WXX4a4OhPJVF3AfLmx2LuV7D9MPSStA7+65GQuPqUbHdvFHeWdIocFjs9SUlIYO3YsgwYNonXr1nTu3Hn/cxMmTOCvf/0rAwYMoF+/fowaNSqMlZpwq6kLsLygjIW5JSzMLSU7t4TSPTUAdE9szTn9OjGqdzKjeqfQIzkhzNUeH2ku1xbPysrS+hNwrVq1igEDBoSpotCLtt+3qVJVdpRXuUPUhRWs3+F+rty6i701dQCkpySQlZ7MyPRkRvdJIS3J/yNIJ0JEclQ162jrWQvHGJ9V1daxvKCM7NxSsjeXsmhzKcVBI3iTEmLJ7NyOq0f0YER6MiPSk+jUvnHHv0QKCxxjGpGqkl+6l8V5O/fflhWUUV0bACAj1Y3cHdy9A5md29K3cztS2rSK6NZLY7LAMeYEVNcGWFawk/mbSli4qYQl+WX7O3bjWrZgYLf23DC6F8N7JTO8V1KT6uD1gwWOMcegLqAsztvJZ+uKWLCphEVb3CA7gD4d2zCufyeG9EhkaI9E+nVpR2yMnR8dzNfAEZEJwKNADDBVVR+u93xP4O9AorfOPao603vuXuAmoA64XVVn+VmrMYezq7KGz9Z+zUerC5m7poiS3dWIwMneILvTMpLJSk8mtW10t14awrfAEZEY4AlgPJAPLBSRGaq6Mmi1+4HXVPUpETkZmAmke/cnAwOBbsCHItJXVev8qteYYIGA8um6Il6ct5lP1hZRG1ASE2I5u29Hzh3QmTMzUyNuFG9T4GcLZySwXlU3AojINGASEBw4CrT37ncAtnr3JwHTVLUK2CQi6733m+djvb443ukpAB555BGmTJlCQkLTHHPRFJXtreH1nHxenJdLbvEeUtvGcdMZGYwf0JmhPRJpabtIJ8TPwOkO5AU9zgdOq7fOr4HZIvJjoA1wXtBrg4fd5nvLvkFEpgBTAHr27NkoRTe2w01P0RCPPPII119/vQWOzwIBZUFuCW995c5L2ltTx/BeSdw5vi8TB3WlVUsLmcYS7k7ja4AXVPVPIjIaeFFEBjX0xar6DPAMuIF/PtV4QoKnpxg/fjydOnXitddeo6qqissvv5wHH3yQ3bt3c9VVV5Gfn09dXR2//OUvKSwsZOvWrZxzzjmkpqYyZ86ccP8qzc6qbbt4a3EB7yzeytayShJaxXDxKV25YUw6g7p3CHd5zZKfgVMA9Ah6nOYtC3YTMAFAVeeJSDyQ2sDXHpt/3wPbl53QWxyky2CY+PARVwmenmL27Nm8/vrrLFiwAFXl0ksv5dNPP6WoqIhu3brx3nvvAe4cqw4dOvDnP/+ZOXPmkJqa2rh1R6maugDZuaXMXbuDOat3sLawgpgWwpmZqdw9sT/jT+5MQqtw/x/cvPm5dRcCmSKSgQuLycC19dbZAowDXhCRAUA8UATMAF4WkT/jOo0zgQU+1hoSs2fPZvbs2QwbNgyAiooK1q1bxxlnnMHPfvYz7r77bi6++GLOOOOMMFfaPNQFlA1FFeRsLmXumh18vr6YiqpaWrYQstKTeGjSQC4a3JUUO7oUMr4FjqrWishtwCzcIe/nVHWFiDwEZKvqDOBnwLMicieuA/lGdSd3rRCR13AdzLXAf53wEaqjtERCQVW59957ueWWWw56btGiRcycOZP777+fcePG8cADD4ShwqatqLyK+ZuKWZK3kyX5ZawoKGN3tfvadOsQzyVDunF2v46MPSmVtnHWkgkHX7e6N6ZmZr1lDwTdXwmMPcxrfwf8zs/6QiF4eooLLriAX/7yl1x33XW0bduWgoICYmNjqa2tJTk5meuvv57ExESmTp36jdfaLtWh1QWUpfk7mbOmiLlrdrA0vwyAVi1bcHLX9lw5PI0hPRIZ0iOR3qltoub0gUhmMe+z4OkpJk6cyLXXXsvo0aMBaNu2Lf/85z9Zv349d911Fy1atCA2NpannnoKgClTpjBhwgS6detmncaeuoDy5cZi3l5cwIerdlCyu5oWAsN6JvHz8/tyZt+O9O/S3o4sRSibnqIZaa6/r6qyrKCMtxdv5Z0lW9lRXkXbuJaMG9CJc/t34szMjhE/011zZ9NTmCavLqDMXLaNxz9ez5rCclrFtODsfh2ZNLQ74wZ0CttE4Ob4WeCYiLMvaB77aB3rdlSQ2aktv//WYC4c1JUOCbHhLs+cgGYfOKoaFZ2FzWHXuLYuwMzl2/m/oKD5v2uGceHgrsS0aP7/htGgWQdOfHw8xcXFpKSkNOvQUVWKi4uJj2+as8SV7alh2sIt/P2LXLaWVVrQNGPNOnDS0tLIz8+nqKgo3KX4Lj4+nrS0tHCXcUzW76jghS82MT2ngL01dYzuncKvLx3IuAGdLWiaqWYdOLGxsd+46JyJDKu27eKxj9bx7+XbaRXTgklDu/G9sRmc3K390V9smrRmHTgmsqze7oJm5rLttI1ryY/PPYkbxqTbxFVRxALH+G7N9nIe+2gd7y3btj9objo9wyawikIWOMY3wS2aNq1iuO2ck/jBGRY00cwCxzS6Q+06WYvGgAWOaUS7Kmt4cMZKpi/Kt6Axh2SBYxrFgk0l3PnqYrbvquSHZ/Xhh2f1tqAxB7HAMSekpi7AIx+u5am5G0hLSuBfPxzNqT2Twl2WiVAWOOa4bSyq4CevLmZpfhlXZaXxwCUDbWIrc0T27TDHrKKqlifnrGfqZ5tIiIvhr9efyoRBXcNdlmkCLHBMgwUCyvRF+fy/WWsoKq/i8mHduWdifzq3b5rncJnQs8AxDZKdW8JD765kaX4ZQ3sk8vR3hltfjTlmFjjmsAIB5aPVO3j2s40s2FRCl/bx/OXqIUwa0p0WdnKlOQ4WOOYge6vrmL4on+f+s4mNX++mW4d47rtwANeN6mnXbTInxL49Zj9V5Y1FBfz2vZWU7qnhlLQOPHbNMCYO6kKsXVPbNAILHANAye5q/vuNZby/Yjsj0pO464L+jEhPatYTl5nQ8zVwRGQC8CjuQnhTVfXhes//BTjHe5gAdFLVRO+5OmDftXm3qOqlftYazeas3sFdry9l194a/vvC/tx0em+bAMv4wrfAEZEY4AlgPJAPLBSRGd7F7wBQ1TuD1v8xMCzoLfaq6lC/6jOwp7qW3723ipfmb6F/l3a8eNNIBnS1SbCMf/xs4YwE1qvqRgARmQZMwl2+91CuAX7lYz0mSO7Xu7nlxRzW7ihnypm9+en4vnbZFeM7PwOnO5AX9DgfOO1QK4pILyAD+DhocbyIZOOuLf6wqr7lV6HR5uPVhdwxbTExLYS/f28kZ/btGO6STJSIlE7jycDrqloXtKyXqhaISG/gYxFZpqobgl8kIlOAKQA9e/YMXbVNVCCgPPbxOh75cB0Du7Xnr9cPp0dyQrjLMlHEz2OdBUCPoMdp3rJDmQy8ErxAVQu8nxuBuXyzf2ffOs+oapaqZnXsaP9LH0nZ3hpu/kc2j3y4jm8N6870H42xsDEh52cLZyGQKSIZuKCZDFxbfyUR6Q8kAfOCliUBe1S1SkRSgbHA//Ox1mbti/Vfc9frSyncVclDkwbynVG97HC3CQvfAkdVa0XkNmAW7rD4c6q6QkQeArJVdYa36mRgmn7z0pEDgKdFJIBrhT0cfHTLNMzuqloe/vdqXvxyM71T2/DqLaMZ3svOfzLhI83hErEAWVlZmp2dHe4yIsa8DcX8YvoS8kv3ctPYDH5+QT87CmV8IyI5qpp1tPUipdPYNJLq2gD/M3MVL3yRS3pKAq/dMpoR6cnhLssYwAKnWSkqr+LWl3JYmFvKjWPSuXtCf1q3slaNiRwWOM3E0vydTPlHDmV7a3j82mFcfEq3cJdkzEEscJqBNxblc88by+jYNo7pPxpj1+g2EcsCpwkLBJT/mbmKqf/ZxOjeKTxx3akkt7FLs5jIZYHTRAUCyt3Tl/KvnHxuHJPOfRcNsDlrTMSzwGmCAgHlnjdc2NwxLpM7x/cNd0nGNIj9l9jEBALKvW8s47XsfG63sDFNjAVOExIIKP/95jJezc7j9nNP4s7zMsNdkjHHxAKniQgElPveWsa0hXncds5J3Dm+r50PZZocC5wm4g+zVvPKgjz+65w+/Ox8CxvTNFngNAEvz9/C059s5PpRPfn5+f0sbEyTZYET4T5dW8Qv317O2f068utLBlrYmCbNAieCrdlezq0vLSKzU1sev/ZUWto4G9PE2Tc4Qu3YVcn3X1hIm7gYnv/eCNrG2ZAp0/TZtzgC7amu5Qf/yKZ0TzWv3TKarh1ah7skYxqFtXAizJ7qWqb8I4flBWX83zXDGNS9Q7hLMqbRWAsngpRX1nDTC9lkby7hj1cOYdyAzuEuyZhGZYETIcr21HDD8wtYXlDGY9fYfDamebLAiQDFFVV8528LWL+jgievO5XzB3YJd0nG+MICJ8x27Krkuqnz2VKyh2dvyOIsuwqmacYscMKobG8N102dT8HOvTz/vRGM6ZMa7pKM8ZUdpQqT6toAt76UQ27xbqZ+N8vCxkQFa+GEgapy/1vL+Hx9Mf/77SGMOcnCxkQHX1s4IjJBRNaIyHoRuecQz/9FRBZ7t7UisjPouRtEZJ13u8HPOkPtybkb3ARa557ElcPTwl2OMSHjWwtHRGKAJ4DxQD6wUERmBF+yV1XvDFr/x8Aw734y8CsgC1Agx3ttqV/1hsqMJVv546w1XDa0m83WZ6KOny2ckcB6Vd2oqtXANGDSEda/BnjFu38B8IGqlngh8wEwwcdaQyI7t4Sf/2sJIzOS+cOVp9iZ3ybq+Bk43YG8oMf53rKDiEgvIAP4+Fhf21Tsqqzhh//MIS2xNc98ZzhxLe2KmCb6RMpRqsnA66padywvEpEpIpItItlFRUU+ldY4/jp3A19XVPPo5GEkJti1o0x08jNwCoAeQY/TvGWHMpkDu1MNfq2qPqOqWaqa1bFj5A6Y27pzL3/7zyYuG9qNwWl2MqaJXn4GzkIgU0QyRKQVLlRm1F9JRPoDScC8oMWzgPNFJElEkoDzvWVN0p9mr0WBn1/QL9ylGBNWvh2lUtVaEbkNFxQxwHOqukJEHgKyVXVf+EwGpqmqBr22RER+gwstgIdUtcSvWv20cusu3vgqnyln9CYtKSHc5RgTVhL0d96kZWVlaXZ2drjLOMh3/jafZQVlfHLXOXRoHRvucozxhYjkqGrW0daLlE7jZumTtUV8tu5rfnxupoWNMVjg+KYuoPx+5ip6JifwnVG9wl2OMRHBAscn0xfls3p7Ob+Y0I9WLW0zGwMWOL7YU13Ln2avYWiPRC4a3DXc5RgTMSxwfPDYR+sp3FXF/RcNsNMXjAnSoMARkVEi0i7ocXsROc2/spquNdvLmfrZRq7KSiMrPTnc5RgTURrawnkKqAh6XOEtM0ECATfPTbv4ltwzcUC4yzEm4jQ0cKTewLwANnnXQV5flM/C3FLunTiA5DZ2vpQx9TU0cDaKyO0iEuvd7gA2+llYU1Oyu5rfz1zFiPQkm1TLmMNoaOD8EBiDO4EyHzgNmOJXUU3Rw/9eRXllLb+9bDAtWlhHsTGH0qDdIlXdgTvnyRzCwtwSXsvO55azetOvS7ujv8CYKNWgwBGR53FTfX6Dqn6/0StqYmrqAtz35jK6J7bmjnGZ4S7HmIjW0I7fd4PuxwOXA1sbv5ym5+X5W1hbWMGz380ioZX1oxtzJA3dpZoe/FhEXgH+40tFTUhlTR1Pzl3PyPRkzhvQKdzlGBPxjnekcSYQ9X9hry7Mo3BXFT85L9NGFBvTAA3twynnQB+OAoXAL/wqqikIbt2M7pMS7nKMaRIaukvVzrtWVCauDwcO0YkcTfa1bv5y1VBr3RjTQA1t4fwAuAM3mfliYBRuDuJz/Sstclnrxpjj09A+nDuAEcBmVT0Hd4XMnUd+SfNlfTfGHJ+GBk6lqlYCiEicqq4GovISBPtaNyPSk6x1Y8wxaujAkXwRSQTeAj4QkVJgs39lRa59rZs/W9+NMcesoZ3Gl3t3fy0ic4AOwPu+VRWhgls3Y6x1Y8wxO+ahsar6iR+FNAX/ysm31o0xJ8CmGD0Gr+fkM6Bre2vdGHOcfA0cEZkgImtEZL2I3HOYda4SkZUiskJEXg5aXicii73bQZcIDrUtxXtYkreTSUO7WevGmOPk29mGIhIDPAGMx82hs1BEZqjqyqB1MoF7gbGqWioiwadL7FXVoX7Vd6zeWerOVb34FLsKgzHHy88WzkhgvapuVNVqYBowqd46NwNPqGop7J93JyK9s2Qrw3sl2fXBjTkBfgZOdyAv6HG+tyxYX6CviHwuIl+KyISg5+JFJNtbfpmPdR7V2sJyVm8v5xJr3RhzQsI9gUtL3PlZZ+NOm/hURAar6k6gl6oWiEhv4GMRWaaqG4JfLCJT8KY67dmzp29FvrNkKy0ELrTAMeaE+NnCKQB6BD1O85YFywdmqGqNqm4C1uICCFUt8H5uBObiTqf4BlV9RlWzVDWrY8eOjf8buM/gnSVbGd0nhU7t4o/+AmPMYfkZOAuBTBHJEJFWuDmR6x9tegvXukFEUnG7WBtFJElE4oKWjwVWEgbLC3aRW7yHS07pFo6PN6ZZ8W2XSlVrReQ2YBYQAzynqitE5CEgW1VneM+dLyIrgTrgLlUtFpExwNMiEsCF4sPBR7dCacaSAmJjhAmDuoTj441pVnztw1HVmcDMesseCLqvwE+9W/A6XwCD/aytIQIB5d2l2zgzsyOJCXZhO2NOlI00PoKcLaVsK6vkkiG2O2VMY7DAOYIZi7cSH9uC8Sd3DncpxjQLFjiHUVsXYOaybYzr35k2ceEePWBM82CBcxjzNhZTvLuaS4bY2BtjGosFzmG8s2QrbeNacna/qL8ajjGNxgLnEGrrAsxeWch5AzoRHxsT7nKMaTasc+IQFmwqYeeeGht7Y6JPbRV8vRYKV0DhcjjtR9Ch/imQx88C5xBmrywkrmULzuzrz+kSxoRdbTWUbISv17iA2bEadqx09wO1bp2YVnDSeRY4flJVZq/Yzpl9O5LQyjaPaQb2lMC2xbB1MWz9ygVLySbQugPrdOgJnQdCv4nuZ+dBkNwHYhr3b8D+oupZVlDG1rJK7hzfN9ylGNNwqlC+DYo3uJZLyQZ3v3A5lOYeWC8pA7oMhoGXQ2pf75YJrdqEpEwLnHpmrdhOTAvhvAE22M9EmJpKyPsStsyH8q1QsePAbfcOqK08sG5MKxcuXYfA8Buh2zB3v3VS2MoHC5yDzFpRyMj0ZJLa2LlTJsxqKl0fy8a5sGEObJnnhYpAm1Ro0wnadoKUPtCmIySlu/vJfaBDGrSIvCOsFjhBNhRVsH5HBdef5t9kXiZK1VRC0Wq3i1O4Eqp2uUCQFiAx7n71brdbtGuba8HsLT3w+k4nQ9b3ofc50GsMxLUN3+9yAixwgsxasR2A8wfa4XBzgqr3wIaPYPVM11H79doDnbQt492uTaAONOCWa8Atb9cVknpBz1HQvqvbLUo/Hdo1j++kBU6Q2SsKOSWtA90SW4e7FNMUVVXAutmw8m33s2YPtE6GHiOh/0Xu6E+XwZDcOyJ3d0LBAsezvaySxXk7ueuCfuEuxUSaQAD2lkBFIZRvr/dzG5R7P3dthUCN61sZMhlOngS9Tm/0Q8tNmW0Jzwcr3e7UBQPt6FSzEgjAzs2QkALx7Y++fm2162cpyIGCRe5nyYYDg+GCtWrndnXadXGtmA5pcNJ4tzsUpS2Yo7HA8cxaUUjv1Db06dg0O+NMkJpK2PQprHkP1vzbtUQA4hMhsYcb5NahuwuRyl1QVe46cSvL3NiVuiq3fkIqpGVB/wuhbRdo1xnaBt2aaMdtOFngAGV7avhyYzE/OKO3Xca3KQpulWz6FNZ/BDW7oVVbNzQ/40wXKDvzoCzPDYzL/QxiYiGuPcS1g/gOroO2z7kuZLoPhw49wL4PjcoCB/hodSG1AbXdqUij6kLkq3/CnmIXIHFt3ajYVm1gdzEUZMO2pQdaJe26wZCrod9FkHEGtIwL7+9gvsECB3d0qnP7OIakJYa7FANu12bpa5Dzgmu5xLZxu0LVu6G6wv2sq4bYBOg6FE6bAt33tUrSrFUSwaI+cKprA3y6rohvndqdFi3sixo2ZfmweR5snAMr3nSHlLucAhf/BQZ/2+32BKutdh2z1jnbpER94CzfWsae6jrG9kkNdynRIRBwnbiluW4w3JYvYfPn7kgSuD6VQVdA1veg26mHb620tFNPmqKoD5wFm0oAGJGRHOZKmqGaSshfALn/caNtS3OhdPOB/hZwR4J6jYFRP3I/Ow+yVksz5mvgiMgE4FHclTenqurDh1jnKuDXgAJLVPVab/kNwP3ear9V1b/7UeOCTSX06diG1LbWudgoClfAqnfdUaC8BS5cpIU7F6hjf+h7gTvJMDEdkjPcqFvrc4kavgWOiMQATwDjgXxgoYjMCL5kr4hkAvcCY1W1VEQ6ecuTgV8BWbggyvFeW1r/c05EXUBZmFvCxXbd8BOXtwA++xOsfR8Q6HoKjLwZ0s+AXqPdYWcT9fxs4YwE1qvqRgARmQZMAoKvEX4z8MS+IFHVHd7yC4APVLXEe+0HwATglcYscPX2XZRX1nKa7U4dXVUFoO7I0L5dHlU3dcJnf3ItmtZJcM59kHUTtEkJZ7UmQvkZON2BvKDH+cBp9dbpCyAin+N2u36tqu8f5rUHTawqIlOAKQA9ex77lBL7+m9GWuAcTNUdkl77PqydBfnZuMYm7qzm2NbQItZN/NS2C5z/OzfRk42+NUcQ7k7jlkAmcDaQBnwqIoMb+mJVfQZ4BiArK0uP9cMXbCohLam1nR0erCwfPvuzC5pdBW5Z9+Fw1i/cwLuaPW4cTM0eqNkLaSNg6LU2wM40iJ+BUwD0CHqc5i0Llg/MV9UaYJOIrMUFUAEuhIJfO7cxi1NVFmwq4ax+dmWG/Va8Be/c4WaVO+k8OPteyDzfnUNkTCPwM3AWApkikoELkMnAtfXWeQu4BnheRFJxu1gbgQ3A/4jIvglYz8d1LjeaDUW7Kd5dbf034Ppn3r/bnULQ7VS4YqqbqtKYRuZb4KhqrYjcBszC9c88p6orROQhIFtVZ3jPnS8iK4E64C5VLQYQkd/gQgvgoX0dyI3lQP9NlHduFiyC6T9wJzSe8TPXqomJDXdVppnytQ9HVWcCM+steyDovgI/9W71X/sc8JxftS3YVEzHdnGkpyT49RGRq67GnVW94g1YMs1NtXDju24qS2N8FO5O47BQVeZvKmFkRnL0TEdRVwub/+POU1o5w81g16odDL0Oxj8Y9suHmOgQlYGTX7qXbWWV0dN/s2U+zPixu+RIbBt3dcVB34I+4yA2PtzVmSgSlYETNeNvqirgo4dgwTNu2oYr/uYm8461YQAmPKI2cDq0jqVvp3ZHX7mpWv8hvPMTN65m5M0w7oGDp3gwJsSiM3BySxiRntz85r+prXanGix+CVa+5a4b/f1Z0LP+AG9jwiPqAmfHrko2fb2ba0c2k6tr1tXCpk9cZ/Cqd6ByJ8R1gDPvgjN+bn00JqJEXeAsyG0m/Td7SmDhVNc/s7vIHXHqfyEMvNxNBG6nGpgIFH2Bs6mEhFYxDOzWgGsURaKdeTDvCVj0d3d5KlwlAAAL50lEQVQ+U+b5cOp33akI1hlsIlxUBs7wXkm0jGkR7lIabk+Jm4Zz5QxYPt1NWDX42zDmx+7yscY0EVEVODv3VLN6ezkXn9I13KUcWVWFO8q0+XM3PecObwqhVm3htB/C6FvdYW5jmpioCpzSPTWckZnK6EidML2q3PXJfPG4Gwkcm+AuGzvoCnfaQbdTbfJw06RFVeBkpLbhxZsi8BBx5S5Y8LTrm9lb6q5PPfYOFzZ2IqVpRqIqcCJKIOCuZLBqhrvgW+VOyLwAzrob0oaHuzpjfGGBE0p1Na5PZvW7sHomlG8FiXFXMjjzLuh+argrNMZXFjihsv5DmHG7m7azZWs4aRz0f8CFTUITHxNkTANZ4PitqgJm3w85z0NqP7j6n+4s7VZROA+PiXoWOH7a/AW89SN3tcnRt8G5v7RTDUxUs8DxQ20VfPwbd3g7sSfc+B6kjw13VcaEnQVOYytaC9O/D9uXwfDvwfm/tWs1GeOxwGksqvDVi/Dvu92F4q6Z5mbWM8bsZ4HTGPbudNdzWvkWZJwFlz8N7SP89AljwsAC51hkPw9fPuVG/7aMg5g497Nojbvk7Xm/hjF3QIsmdGKoMSFkgdNQK96Ed38CXYdC++5QV+U6h6t3Q+pJ7nC3jRA25oh8DRwRmQA8irsQ3lRVfbje8zcCf+TAJYAfV9Wp3nN1wDJv+RZVvdTPWo9oy5fwxi3QYxR89207tG3McfItcEQkBngCGI+7hvhCEZmhqivrrfqqqt52iLfYq6pD/aqvwb5eD69MdtNBXPOKhY0xJ8DPzoaRwHpV3aiq1cA0YJKPn9f4KorgpSvc+U7Xv26nIBhzgvwMnO5AXtDjfG9ZfVeIyFIReV1EegQtjxeRbBH5UkQu87HOQ6veDa9cDeWFcO2rkNw75CUY09yE+3DKO0C6qp4CfAD8Pei5XqqaBVwLPCIifeq/WESmeKGUXVRU1HhVBepg+s1QsAiu/BukZTXeexsTxfwMnAIguMWSxoHOYQBUtVhVq7yHU4HhQc8VeD83AnOBYfU/QFWfUdUsVc3q2LFj41St6gbvrXkPJv7BXanSGNMo/AychUCmiGSISCtgMjAjeAURCR4ddymwylueJCJx3v1UYCxQv7PZH/Meh4XPupMtT7slJB9pTLTw7SiVqtaKyG3ALNxh8edUdYWIPARkq+oM4HYRuRSoBUqAG72XDwCeFpEALhQfPsTRrca34k03lcTJl8H43/j+ccZEG1HVcNfQKLKysjQ7O/v432DzPPjHJOg2zMbaGHOMRCTH63M9onB3GkeGr9fBtGsgsYeNtTHGRxY4dTXw8tVurM11NtbGGD/ZuVTLp0PJBjedRHJGuKsxplmL7haOKnz+KHQ6GfpOCHc1xjR70R0462a7y+iOud1dr9sY46voDpzPH4X2aTD4ynBXYkxUiN7AyVsImz+H0bfa5XSNCZHoDZzPH4H4RDj1hnBXYkzUiM7AKVoLq9+DkTfbFRWMCaHoDJwvHnNzEY+0c6WMCaXoC5xd22DpqzD0OmjbSGeYG2MaJPoCZ/5TEKiFMYea1dQY46foCpzKMnepl5Mn2Qx+xoRBdAVOeSGk9IGxPwl3JcZEpeg6l6pjX7h5jo0qNiZMoquFAxY2xoRR9AWOMSZsLHCMMSFjgWOMCRkLHGNMyFjgGGNCxgLHGBMyFjjGmJBpNtelEpEiYPNRVksFvg5BOQ0RKbVESh1gtRxKpNQBR66ll6oe9WzoZhM4DSEi2Q25WFcoREotkVIHWC2RXAc0Ti22S2WMCRkLHGNMyERb4DwT7gKCREotkVIHWC2HEil1QCPUElV9OMaY8Iq2Fo4xJoyiInBEZIKIrBGR9SJyT4g/u4eIzBGRlSKyQkTu8JYni8gHIrLO+5kUonpiROQrEXnXe5whIvO9bfOqiLQKUR2JIvK6iKwWkVUiMjqM2+RO799muYi8IiLxodouIvKciOwQkeVByw65HcR5zKtpqYicGoJa/uj9Gy0VkTdFJDHouXu9WtaIyAUN+YxmHzgiEgM8AUwETgauEZGTQ1hCLfAzVT0ZGAX8l/f59wAfqWom8JH3OBTuAFYFPf4D8BdVPQkoBW4KUR2PAu+ran9giFdTyLeJiHQHbgeyVHUQEANMJnTb5QWg/oXtD7cdJgKZ3m0K8FQIavkAGKSqpwBrgXsBvO/wZGCg95onvb+1I1PVZn0DRgOzgh7fC9wbxnreBsYDa4Cu3rKuwJoQfHYa7gt8LvAuILiBXC0Pta18rKMDsAmvDzFoeTi2SXcgD0jGzYD5LnBBKLcLkA4sP9p2AJ4GrjnUen7VUu+5y4GXvPvf+DsCZgGjj/b+zb6Fw4Ev1D753rKQE5F0YBgwH+isqtu8p7YDnUNQwiPAL4CA9zgF2Kmqtd7jUG2bDKAIeN7bvZsqIm0IwzZR1QLgf4EtwDagDMghPNtln8Nth3B/l78P/PtEaomGwIkIItIWmA78RFV3BT+n7r8IXw8XisjFwA5VzfHzcxqoJXAq8JSqDgN2U2/3KRTbBMDrH5mEC8FuQBsO3q0Im1Bth6MRkftw3QMvncj7REPgFAA9gh6nectCRkRicWHzkqq+4S0uFJGu3vNdgR0+lzEWuFREcoFpuN2qR4FEEdk3mX6otk0+kK+q873Hr+MCKNTbBOA8YJOqFqlqDfAGbluFY7vsc7jtEJbvsojcCFwMXOcF4HHXEg2BsxDI9I46tMJ1dM0I1YeLiAB/A1ap6p+DnpoB3ODdvwHXt+MbVb1XVdNUNR23DT5W1euAOcCVoarDq2U7kCci/bxF44CVhHibeLYAo0Qkwfu32ldLyLdLkMNthxnAd72jVaOAsqBdL1+IyATcbvilqrqnXo2TRSRORDJwHdkLjvqGfnfKRcINuBDXw74BuC/En306rkm8FFjs3S7E9Z98BKwDPgSSQ1jT2cC73v3e3hdlPfAvIC5ENQwFsr3t8haQFK5tAjwIrAaWAy8CcaHaLsAruL6jGlzL76bDbQdcJ/8T3vd4Ge7Imt+1rMf11ez77v41aP37vFrWABMb8hk20tgYEzLRsEtljIkQFjjGmJCxwDHGhIwFjjEmZCxwjDEhY4FjjAkZCxzjOxEZKiIXBj2+tLGmCRGRn4hIQmO8l/GfjcMxvvOGxmep6m0+vHeu994NvpSKiMSoal1j12KOzlo4Zj8RSfcmw3rWm5Bqtoi0Psy6fUTkfRHJEZHPRKS/t/zb3kRWS0TkU+90koeAq0VksYhcLSI3isjj3voviMhTIvKliGwUkbO9iaBWicgLQZ/3lIhke3U96C27HXfC5RwRmeMtu0ZElnk1/CHo9RUi8icRWQKMFpGHxU2KtlRE/tefLWoOEoqh43ZrGjfcXCi1wFDv8WvA9YdZ9yMg07t/Gu7cLHBD7rt79xO9nzcCjwe9dv9j3KRP03DD9icBu4DBuP8Mc4Jq2Te8PwaYC5ziPc4FUr373XDnRnXEnZH+MXCZ95wCV3n3U3DD8SW4Trv5f7MWjqlvk6ou9u7n4ELoG7ypNsYA/xKRxbiJobp6T38OvCAiN+PCoSHeUfeXvwwoVNVlqhoAVgR9/lUisgj4CjfL3KFmbRwBzFV35ve+qRTO9J6rw52xD27Om0rgbyLyLWDPQe9kfNHy6KuYKFMVdL8OONQuVQvcBFVD6z+hqj8UkdOAi4AcERl+DJ8ZqPf5AaCldzbyz4ERqlrq7WrFN+B9g1Wq12+jqrUiMhJ3ZviVwG246TqMz6yFY46ZugnENonIt2H/5N5DvPt9VHW+qj6Am9WvB1AOtDuBj2yPm6SrTEQ64+b23Sf4vRcAZ4lIqje/7jXAJ/XfzGuhdVDVmcCduDmVTQhYC8ccr+uAp0TkfiAW1w+zBPijiGTi+mQ+8pZtAe7xdr9+f6wfpKpLROQr3BQSebjdtn2eAd4Xka2qeo53uH2O9/nvqeqh5rFpB7wtIvHeej891prM8bHD4saYkLFdKmNMyNgulTkiEXkCN8dvsEdV9flw1GOaNtulMsaEjO1SGWNCxgLHGBMyFjjGmJCxwDHGhIwFjjEmZP4/VsITn/5WN/AAAAAASUVORK5CYII=\n",
- "text/plain": [
- ""
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "steps = np.arange(1, MAX_ESTIMATORS, 3)\n",
- "srfc = StreamingRFC(n_estimators_per_chunk=3,\n",
- " max_n_estimators=np.max(steps))\n",
- "\n",
- "%time train_scores, test_scores = inc_partial_fit(x, y, srfc=srfc, steps=steps, sample=0.1)\n",
- "print(f\"With {len(srfc.estimators_)}: {train_scores[-1]} | {test_scores[-1]}\")\n",
- "plot_auc(steps, train_scores, test_scores)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### Forest of partial decision trees\n",
- "1 estimator per 10 % subset with all features"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "CPU times: user 22.3 s, sys: 31.2 ms, total: 22.3 s\n",
- "Wall time: 22.6 s\n",
- "With 119: 0.8138937870631217 | 0.6356310672155322\n"
- ]
- },
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAARwAAAELCAYAAAALJznDAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3Xl8VPW9+P/Xm+wLZCVANggQkU1Bw+ZSRYuiUpdaEZdbvW2lt63VVuut/Kxa7e2t/fbaWnut1lqrva1aq62iYgUFxCogAdnXEJAkQAjZSCDrzPv3xzngELaAc2aSzPv5eMwjZ515zyF58zmf81lEVTHGmFDoFe4AjDGRwxKOMSZkLOEYY0LGEo4xJmQs4RhjQsYSjjEmZCzhGGNCxhKOMSZkLOEYY0ImOtwBBEtmZqYOGjQo3GEYE5GWL1++V1X7nui4HpNwBg0aRHFxcbjDMCYiicinnTnObqmMMSFjCccYEzKWcIwxIdNj6nCOpq2tjfLycpqbm8Mdiufi4+PJzc0lJiYm3KEYc0w9OuGUl5fTu3dvBg0ahIiEOxzPqCrV1dWUl5dTUFAQ7nCMOaYefUvV3NxMRkZGj042ACJCRkZGRJTkTPfWoxMO0OOTzUGR8j1N99ajb6mMMSenuc3H6vJ6dtU3Ud/UxqUj+9OvT3zQ3t8Sjsfq6up44YUX+Pa3v31S511++eW88MILpKamehSZiURNrT42VzawtaqR/a0+egk0Nrezq76ZlWV1rNtZT5vvs3HOB2cmW8LpTurq6vjtb397RMJpb28nOvrYl3/OnDleh2Z6OL9f2bB7H0tKa1i3s54NuxrYXNmAz3/kxAmJsVGMyknha+cVMG5gOoMyE0lNjCU1IbhPPS3heOzee+9l69atjBkzhpiYGOLj40lLS2Pjxo1s3ryZq6++mrKyMpqbm7nzzjuZOXMm8FlXjcbGRi677DLOO+88PvroI3Jycnj99ddJSEgI8zczXYnPr+yoOUDJnkbKaw+wYdc+5m/cw97GVgCyesdx+oA+fHF4FiOzUxialUSfhBhUITkumsTYqJDUA0ZMwnnojXWs37kvqO85IrsPD35p5HGPeeSRR1i7di0rV65k4cKFXHHFFaxdu/bQ4+tnn32W9PR0mpqaGDduHNdeey0ZGRmHvceWLVt48cUX+f3vf8/06dN59dVXufnmm4P6XUzXt7+lnW1791NR10S7T9nX3Mb8jXtYU15PVWPLYSWX3nHRXDCsL5OHZXHO0AwGpHSN/6AiJuF0FePHjz+srczjjz/OP/7xDwDKysrYsmXLEQmnoKCAMWPGAHD22Wezffv2kMVrQmPPvmZW7Kilobmd+qY2dtU34/MrvUQoq3VKLtur99NxGrkBKfGcMzSD7JQEBmYkMiQrmfz0RDKSYrvkk8uISTgnKomESlJS0qHlhQsX8u6777J48WISExO58MILj9qWJi4u7tByVFQUTU1NIYnVBJ+qUrp3P8Xba1hZVkd5bRNVDS1s3N1w2HHxMb2IjeqFz69kpyYwrF9vrhmbQ2FWMrlpicRG9yI6ShicmdQlE8uxeJpwRGQq8GsgCnhGVR/psD8feB5IdY+5V1XnuPtmAV8HfMAdqvqOl7F6pXfv3jQ0NBx1X319PWlpaSQmJrJx40aWLFkS4uiM1xpb2lm2vYbVZfWsqahjVXk9VQ0tAPSJj6YgM4n+KfFMO2MA5xf2JT0plj7xMfRJiO5WiaSzPEs4IhIFPAFMAcqBZSIyW1XXBxz2I+BlVX1SREYAc4BB7vIMYCSQDbwrIqepqs+reL2SkZHBueeey6hRo0hISKBfv36H9k2dOpWnnnqK4cOHM2zYMCZOnBjGSE2wtLb7WVlWx9trd/G34nIaW9oRgSF9kzl/aCbjCtIZNyidwZlJ9OrV85LK8XhZwhkPlKhqKYCIvARcBQQmHAX6uMspwE53+SrgJVVtAbaJSIn7fos9jNczL7zwwlG3x8XF8fbbbx9138F6mszMTNauXXto+w9+8IOgx2dOTWu7nzUV9Wzbu58texpYV7GP7dX7qdzXTJtPiYkSrhg9gOuK8jgzL5XkuIipwTgmL69ADlAWsF4OTOhwzI+BuSLyXSAJ+GLAuYH3F+XutsOIyExgJkB+fn5QgjamI79fWbGjlpVldbT7lZr9rWypbODjbTXsb3UK3TFRwun9+1A0MI3s1ATOzEtlYkEGKYnWez9QuFPuDcBzqvqoiEwC/k9ERnX2ZFV9GngaoKio6MjWTMacouY2H4u3VjN3fSXvbqg8VO8CEBvdi4KMJK4am8MXCvtyWr/PKnLN8XmZcCqAvID1XHdboK8DUwFUdbGIxAOZnTzXmKCpamhhT0MzZTUHeHP1LuZv3MOBVh9JsVFcOCyLS0b247yhmSTERhEXHUVUhNW9BIuXCWcZUCgiBTjJYgZwY4djdgAXA8+JyHAgHqgCZgMviMgvcSqNC4GPPYzVRKC6A63MW1/JPz6p4KOt1Ye2pyfFcs3YHC4Z2Z+Jg9OJi44KY5Q9i2cJR1XbReR24B2cR97Pquo6EXkYKFbV2cDdwO9F5Ps4Fci3qqoC60TkZZwK5nbgO93xCZXpOnx+ZdPuBt5cvZO56yupO9BK7YE2fH4lPz2R732xkNP79yEjOZYxeanERNntkRc8rcNx29TM6bDtgYDl9cC5xzj3p8BPvYzP9Gzb9+5n7vrd/KukmhWf1tLY0k5UL+GcIRmML0gnMzmOLw7PYnROSo9s89IVhbvSuMc71eEpAB577DFmzpxJYmKiB5H1LK3tfpZ/WsvfisvYunc/jc1tbK3aD8DQrGSuHpvNWflpnFeYSVbv4A23YE6OJRyPHWt4is547LHHuPnmmy3hdLC7vpn3N+9h2fZapzNjbROVDc2oQu/4aMbmp5HVO44Z4/K5/IwB5KR2jY6LxhKO5wKHp5gyZQpZWVm8/PLLtLS0cM011/DQQw+xf/9+pk+fTnl5OT6fj/vvv5/Kykp27tzJ5MmTyczMZMGCBeH+KmFXvL2Gp94vZf7GSvzqVO6e1i+Zc4dmkpOWwNCsZKYM70dCrFXydlWRk3Devhd2rwnue/YfDZc9ctxDAoenmDt3Lq+88goff/wxqsqVV17JokWLqKqqIjs7m7feegtw+lilpKTwy1/+kgULFpCZmRncuLuR6sYW3tu4hzdW7eSDLXvJSIrlPy4YwtVuR0are+leIifhdAFz585l7ty5jB07FoDGxka2bNnC+eefz913380Pf/hDpk2bxvnnnx/mSMOrobmN9zbs4fWVFSzashefXxmQEs+9l53OLZMGWQmmG4uchHOCkkgoqCqzZs3im9/85hH7VqxYwZw5c/jRj37ExRdfzAMPPHCUd+jZdtc389T7W3nx4x20tPsZkBLPbecPZtoZAxiZ3cdKMz1A5CScMAkcnuLSSy/l/vvv56abbiI5OZmKigpiYmJob28nPT2dm2++mdTUVJ555pnDzu3Jt1S1+1uZs3YXb63exZLSakSEL4/NYcb4PMbmpUVcb+qezhKOxwKHp7jsssu48cYbmTRpEgDJycn8+c9/pqSkhHvuuYdevXoRExPDk08+CcDMmTOZOnUq2dnZPa7SeHNlA08u3Mobq3bS7lcG903i2xcOZXpRHvkZ9lSupxLtOGZhN1VUVKTFxcWHbduwYQPDhw8PU0Sh19W/78EOkX9avJ0Fm6pIiIni+nF5XFeUy4gBdsvUnYnIclUtOtFxVsIxnqo/0MZLy3bw4dZqPt5WTXObn4ykWL7/xdP46qSBpCXFhjtEE0KWcIwn2n1+XlpWxqNzN1F7oI3CrGRmjMvngmF9mTQ4g/gYe9IUiXp8wlHViCiqd5Vb44bmNuauq+SJhSWUVu1nfEE6P/7SSEZk9znxyabH69EJJz4+nurqajIyMnp00lFVqquriY8PXx+hA63tPPbuFp77aDut7X6G9E3id/92NpeM6Nejr705OT064eTm5lJeXk5VVVW4Q/FcfHw8ubm5YfnsJaXV3PPKKspqmrj2rFxuGJ/HWfn2SNscqUcnnJiYmMMmnTPB4/crn5TV8eqKcl78eAf56Yn8deZEJgzOOPHJJmL16IRjgqt2fyubKhtY/mktLxeX8Wn1AWKjenHzhIHMuvx0EmPt18kcn/2GmBPatnc/Ty8q5W/FZbS781dPKEjnzosL+eKIfvSJt5kJTOdYwjFH5fcrL3y8g2c/3EZp1X5io3pxw/h8pozoR2G/ZAak2Bgz5uRZwjFHWP5pLf/11no+2VHHWfmpPPilEUwd1d+SjPncLOGYQ2r2t/Kj19YwZ81uMpNjefS6M/nyWTn2WNsEjSUcg9+vzF1fyQOvr6XuQBt3TTmNr59XQJJNTWuCzH6jIpjfr7y5Zhf/O38LmysbKcxK5rl/H2+tgo1nLOFEqKWl1Tzw+jo2VTZQmJXMr2eM4YrRA4i2+ZiMhzxNOCIyFfg1zkR4z6jqIx32/wqY7K4mAlmqmuru8wEHByHeoapXehlrpPD5lYfeWMefFn9KbloCj98wlmmjB1irYBMSniUcEYkCngCmAOXAMhGZ7U5+B4Cqfj/g+O8CYwPeoklVx3gVXyRq9/n5wd9W8drKnXzt3ALuuXSYjQ9sQsrLEs54oERVSwFE5CXgKpzpe4/mBuBBD+OJaHsbW7jr5VUs2lzFPZcO4zuTh4Y7JBOBvEw4OUBZwHo5MOFoB4rIQKAAmB+wOV5EinHmFn9EVV/zKtCe7uNtNdz+wgrqm9r42ZdHc8P4/HCHZCJUV6k0ngG8oqq+gG0DVbVCRAYD80VkjapuDTxJRGYCMwHy8+2PqCO/X/nDv7bxyD83MjA9kee/Np7hA+wJlAkfLxNOBZAXsJ7rbjuaGcB3AjeoaoX7s1REFuLU72ztcMzTwNPgjGkclKh7iN31zdzzyio+2LKXS0f243+uO5Pe1ufJhJmXCWcZUCgiBTiJZgZwY8eDROR0IA1YHLAtDTigqi0ikgmcC/w/D2PtMVSV2at2cv9ra2nzKT+9ZhQ3js+31sKmS/As4ahqu4jcDryD81j8WVVdJyIPA8WqOts9dAbwkh4+RuZw4Hci4gd64dThHKuy2bhUlZ++tYFn/rWNs/JTeXT6GAoyk8IdljGH9OhpYiKJz6/85M31PPfRdm6ZNJD7p42wRnwmZGyamAiyu76Z7/31E5aU1vCN8wq474rhdgtluiRLON3cu+srueeVVTS3+fnFV87gK2fnWrIxXZYlnG6q3efnkbc38sy/tjFiQB9+c+NYhvRNDndYxhyXJZxuqP5AG9/8czFLSmv46qSB3HfFcOKirYuC6fos4XQzzW0+bvu/YlbuqOOX08/ky2eFZ2oYY06FJZxuxOdX7v7bKj7eVsPjN4zlyjOzwx2SMSfFnpt2E6rKff9Yw1urd3Hf5cMt2ZhuyRJON/Gztzfy0rIyvnvRUG77wuBwh2PMKbGE0w38ecmnPL2olK9OGshdU04LdzjGnDJLOF3cR1v38uDsdUwe1pcHvzTS2tiYbs0SThdW39TG3S+vYmBGIr+58SyibBhQ083ZU6ou7KHZ69jT0MLfv3UOyTZli+kBrITTRT3/0Xb+/kkF35k8lDPzUsMdjjFBYQmnC/rHJ+U8OHsdl4zoxx0X2djDpuewcnoXUneglZ+8uYFXV5QzcXA6j98w1oaYMD2KJZwu4qOSvXz/5ZVUN7Zy++Sh3H7RUOJjrH+U6Vks4XQBc9bs4jsvrKAgM4k/3DKOUTkp4Q7JGE9YwgmznXVN3Pvqas7ITeXF2yaQGGv/JKbnsgqCMPL7lbteXkm7X/n19WMs2ZgezxJOGM1etZMlpTXcP20Eg2ywcxMBLOGESVOrj5//cyOjc1K4vijvxCcY0wNYwgmTpxeVsqu+mfunjaCXdVkwEcISThhU1DXx5PslXD66P+ML0sMdjjEhYwknDP77rQ0A/H+XDw9zJMaElqcJR0SmisgmESkRkXuPsv9XIrLSfW0WkbqAfbeIyBb3dYuXcYbSR1v38taaXXzrgqHkpiWGOxxjQsqz57AiEgU8AUwByoFlIjI7cMpeVf1+wPHfBca6y+nAg0ARoMBy99xar+INhdZ2Pw+8vo689AS+eYGN2mcij5clnPFAiaqWqmor8BJw1XGOvwF40V2+FJinqjVukpkHTPUw1pD4/QellOxp5OErR1m3BRORvEw4OUBZwHq5u+0IIjIQKADmn+y53UXJnkZ+M38LU0f2Z/LpWeEOx5iw6CqVxjOAV1TVdzInichMESkWkeKqqiqPQvv86g608o3nl5EcF82PrxwZ7nCMCRsvE04FENiiLdfddjQz+Ox2qtPnqurTqlqkqkV9+/b9nOF6o6Xdx7f+vIKddc387t+K6J8SH+6QjAkbLxPOMqBQRApEJBYnqczueJCInA6kAYsDNr8DXCIiaSKSBlzibutW/H7l7pdXsbi0mp9/ZTRnD0wLd0jGhJVnT6lUtV1EbsdJFFHAs6q6TkQeBopV9WDymQG8pKoacG6NiPwEJ2kBPKyqNV7F6pXH3t3Mm6t3Meuy07lmrE3Ja4wE/J13a0VFRVpcXBzuMA4prWrk0scW8aUzsnl0+pk2vYvp0URkuaoWnei4rlJp3OP811sbiIuO4t7LT7dkY4zLEo4HFm2uYv7GPdxx8VCyelslsTEHWcIJMlXl0XmbyUlN4NZzCsIdjjFdiiWcIFu4qYpVZXV896KhxEbb5TUmkP1FBJGq8qt3N5OXnsC1Z9tTKWM6soQTRAs3VbG6vJ7vTi4kxuaTMuYI9lcRJKrK4/O3kJOawDVndetuX8Z4xhJOkCzeWs0nO+r4jwuHWOnGmGOwv4wgeXz+FrJ6x3Gd1d0Yc0yWcIJg8dZqlpTW8M0Lhtg4N8YchyWcz+ngk6ms3nHcNCE/3OEY06VZwvmcPtiyl4+31fDtC610Y8yJdCrhiMhEEekdsN5HRCZ4F1b3sPzTWr7zlxUMykhkxngr3RhzIp0t4TwJNAasN7rbItan1fv5tz8sJbN3HC/cNtFKN8Z0QmcTjnQYr8aPh2PpdAcvLN1BS7ufP39jAtmpCeEOx5huobMJp1RE7hCRGPd1J1DqZWBdWbvPz98/qWDysCxyLNkY02mdTTj/AZyDM65wOTABmOlVUF3d+5urqGpoYXqRtbkx5mR06rZIVffgDAVqgJeLy8hMjrXpXow5SZ1KOCLyR5wZMA+jql8LekRdXN2BVuZv3MNXJw2yLgzGnKTOVvy+GbAcD1wD7Ax+OF3fnDW7afMp14y1DprGnKzO3lK9GrguIi8C//Ikoi7utZUVDOmbxMjsPuEOxZhu51TvCQqBiKvA2FnXxMfbarh6TI4NjG7MKehsHU4Dn9XhKFAJ/KdXQXVVb6xy7iKvHJMd5kiM6Z46e0vVW0TScUo2B6ch6BkTWp2EuesrOSM3hYEZSeEOxZhuqbN9qb4BvA/8E/hxwM8TnTdVRDaJSImI3HuMY6aLyHoRWSciLwRs94nISvd1xBTBodbQ3MbKsjrOL8wMdyjGdFudfUp1JzAOWKKqk935wP/7eCeISBTwBDAFp7HgMhGZrarrA44pBGYB56pqrYgE1gs1qeqYk/gunvp4Ww0+v3LuEEs4xpyqzlYaN6tqM4CIxKnqRmDYCc4ZD5SoaqmqtgIvAVd1OOY24AlVrYVDDQy7pA9LqomL7sVZA9PCHYox3VZnE065iKQCrwHzROR14NMTnJMDlAW+h7st0GnAaSLyoYgsEZGpAfviRaTY3X51J+P0zEdb91I0KM16hRvzOXS20vgad/HHIrIASMGpxwnG5xcCFwK5wCIRGa2qdcBAVa0QkcHAfBFZo6pbA08WkZm4fbry870bj2ZvYwsbdzfwn1NPVKgzxhzPSbfDUdX3VXW2e5t0PBVAXsB6rrstUDkwW1XbVHUbsBknAaGqFe7PUmAhMPYosTytqkWqWtS3b9+T/Sqd9mHJXgCrvzHmc/KyM9AyoFBECkQkFqfzZ8enTa/hlG4QkUycW6xSEUkTkbiA7ecC6wmTdzfsISMpllE5KeEKwZgewbNBtFS1XURuB94BooBnVXWdiDwMFKvqbHffJSKyHvAB96hqtYicA/xORPw4SfGRwKdbodTS7mPBxj1MO2MAUb2sdbExn4eno/ap6hxgTodtDwQsK3CX+wo85iNgtJexddbirdU0trRzych+4Q7FmG7Pxlc4gbnrK0mKjeIcq78x5nOzhHMcfr8yb30lFw7LssfhxgRBRA+EfiJrKuqpamhhygi7nTI9TFszNOyCA9WwrwKa90F6AaQOhMR0iEkED0ZEsIRzHB9sqQKw/lOme/L7oGUfNNdDTSns3QK1n8Lu1VC2FHzHadkSkwjJ/eCapyB/YtBCsoRzHIs272VUTh8ykuPCHYqJNL4252dUDLQegH07YV857N8LTbXQ2ghtTaB+aGmA2u1OiWV/NajPOb+pxtkfKCYRMgth/EzoNxIS0qFPNsQlQ3WpU9ppqoHGKmishMSMoH4tSzjH0NDcxoodtdz2hcHhDsX0ZL425w976wKn5NF3GNRXwLI/QEu9kyDaDhznDQRikyFtoJM4skZCVDT0iobETEhIhbg+kDYIMk+D5Kxj3yqle/+7bgnnGJaU1tDuV75Q6F0LZhMh2pqgeitsmgMl7zoJpK3JKUW01H92XHQ8tDcDAiOuhH6jnNuhxAwnmfTJcRJGQpqTZGISPKln8ZIlnGNYtLmKxNgozhqYGu5QTHfSvA+2zIXSBbBzJTTshgN7P9ufOw5S8iA6DpL6OqWQxHTImwD9R0PdDugVBSk9c84zSzhH4fMr8zfuYeLgDOKi7XG4OQ6/36k7KVsCa/8OW+aBrwXiUyFvvJNg+mRDWgEMnHTiRJI2MDRxh4klnKNYtKWKiromZl1+erhDMV2Brw3KlznJZPM7ToVt9hin9LJrlXsbBCT3h6J/h5Ffhtwip6RiDmMJ5yheWLqDzORYLhnRP9yhmFDxu092fK1OvUl9GexaDdsWOa/WBpAoGHgOZA51Ek1SFhR9DTKGQtYIp0RjSea4LOF0sKu+ifkb93Db+YOJjbaG2D1W637Y9DaUvAc7V0B1CfjbjzwuNR9GXwtDLoaCLzhPfcwps4TTwavLy/H5lRvHezegl/FYS6NT+dq7v1MhG6i+HNa8Ah/9xqnMTUiD/Ekw7DLn8XGvaCep9B7gVOIm9+t2T4K6Mks4HXxYUs3I7D7kZySGO5TI1N4K/jaI7TAVT1OdmxB6OXUnFcudBm/tzc7P8mVQud5pqt9U89l5vQc4tzuxiVC1CfZudrYPngzn3+3cItltUMhYwgnQ5vOzsqyO68flnfhgExw125y2KaULYfu/oLnO2Z43wUkKccnObU/pAug73EkQK//yWUXtQSl5kD3WaafSe4DT0G3fTqhcB3vWQV2T0/BtzE0w7HLoe1qov6nBEs5h1u/cR1Obj6JBNjOD5/ZshHkPwJZ3nPWUfBg+zUkU7a1OI7n3H3H29c6Gc+5wEk/xs3DGdBj3DadBXEyC02Cu462T6ZIs4QQo/rQWgKKB9svridpPnbqTknlO35+4PnDRj2DENZAx5PC6kovucxJP2373VioKpjzs3D7F9wnbVzCfjyWcAMXba8hNS6B/SvyJDzbHtv1Dp06lV5SzvGOxUwo5sBekFxRe4pRQzrwBko7TEz861nkdJGLJppuzhONSVYo/reXcIcHtHRtR/H54/+fO6+DU8yl5Tr8gVee2Z/w3IaXj9GQmUljCce2oOUBVQwtFg+x26pT4ffDat2D1X+HMG+HSnzrbE9LssbI5xBKOa7lbf3O2TeV78tpb4I3vOcnmoh/B+T+wJGOOyhKO65MddSTFRnFav97hDqXr8vth7Suw4k/QuMd5QjToPNj4FtRug8n3wRfuCXeUpguzhOP6pKyWM/NSbe6pY6krg5e/6nQDyBzmDBS1fy8sedLpS3TzqzD0i+GO0nRxniYcEZkK/BpnIrxnVPWRoxwzHfgxTi3jKlW90d1+C/Aj97D/UtXnvYqzqdXHhl0NfOuCIV59RPdWvhxeutEZNOqap2H0dU6LX3C2RcV9tm7McXiWcEQkCngCmIIzh/gyEZkdOIOmiBQCs4BzVbVWRLLc7enAg0ARTiJa7p5b60Wsayrq8fmVsfkR1jGvpdG5LTrYtN/XDuv+4Qy8nZjhjG9b8i5snQ99cuHr70DW8MPfIyYh9HGbbsvLEs54oERVSwFE5CXgKg6fI/w24ImDiURV97jbLwXmqWqNe+48YCrwoheBfrLDyWNj8iIg4ajC8ufgk/9z+iNFxTmDavcbCTs/+ayv0UF9cuDiB5xhGBKsQt18Pl4mnBygLGC9HJjQ4ZjTAETkQ5zbrh+r6j+Pca5njTdW7KhlYEZiz56dQdVp3fvOfbDpLRhwJlzwQ2d83T0bYdsHEJ8C1/8Fcs6G/VXO6HTWZcAEUbgrjaOBQuBCIBdYJCKdnlNcRGYCMwHy809tOAlV5ZMddZzTExr8+dqdFr47FjvzELW3OJ0Z63ZAxQpnmpFe0XDpz2Dit47/6LrPgNDFbSKGlwmnAgjsdp3rbgtUDixV1TZgm4hsxklAFThJKPDchR0/QFWfBp4GKCoq0lMJsqymiT0NLV2z/U1TLax/HfZscCYjO1AD61+D3PFw/l3OMA0l7zl9k6pLYN8uaG9yzk3Kgph4Z2aA3v0gbxwM/B4MvTgk04EYczReJpxlQKGIFOAkkBnAjR2OeQ24AfijiGTi3GKVAluB/xaRg1ngEpzK5aBbsq0agAmDu0gJp2ozrH7JGa5h50pnUrOoWFj6lLM/bZAz5OVHv3EG6wZIHwIDxsBpl0H+BHdkui6YQE3E8yzhqGq7iNwOvINTP/Osqq4TkYeBYlWd7e67RETWAz7gHlWtBhCRn+AkLYCHD1YgB9vS0hrSk2IpzEr24u2PtOFN2Pa+c1sT1eHyz3sQPnzMGTs3bzyc9z0Y/iXoN9pp/xIV69S9lC2FVS868xYNucjpaW1MNyCqp3Qn0uUUFRVpcXHxSZ933s/nMyo7haf+7WwPogqgCv/6Jbz3sLN+2S9gwszP9pcuhD/rYnLxAAAOuklEQVRdBWfMgCkPOcNjGtNNiMhyVS060XER3Vqroq6J8tomJgz2+ElMQyX85Ton2Yy+DgadDwv+y5kH2u93Gta99h3IKIQvPWbJxvRY4X5KFVZLS936mwIP62+2fwh/u8UZOOqyX8D426BqIzx5Lvx2gvMkqWUfRCfArW9ZQzrTo0V0wvl4Ww194qMZ1t+jDpvLn4e37nIqem95E7LcifWyhsMV/+O0fUlMd546FU6xNi+mx4vohLOyrI6zBqYFv8OmKnzwKMz/idOh8SvPOo3qAhV9zXkZE0Eitg6nuc3Hlj2NjM5JOfHBJ2vBT51kc8b1cMNLRyYbYyJUxJZwNuzah8+vjMwOcjL44FFY9As466sw7dfWi9qYABH717B25z4ARucGMeGUvu8+iZoO0x6zZGNMBxH7F7Guop60xBiygzVDQ3srvHW3U0F85eM2m6MxRxGxt1Rrd9YzKicFCdbYu4t/A9Vb4KZX7NG2MccQkSWc1nY/m3Y3BK/+pqESFj0Kp09zHm8bY44qIhPO5soG2nzKqJwgTaq28GdOR8opDwfn/YzpoSLylmrdznoARn3eEk7FCmfoiBV/cmaStE6UxhxXRCac7dUHiIkS8tMTT/1NNrwJf73JWU7qCxf8Z3CCM6YHi8iEU1HbxICUBHqdagvjgy2J0wrg+j9Dar7NeW1MJ0RmwqlrIif1czxJ2v6BMz7NtF9B/1HBC8yYHi4iK43Law+Qm3aKCedg6SapL5x5Q3ADM6aHi7iE09ruZ09DCzmnknBUYd79zmBZ591l7W2MOUkRl3B21TehyqndUi1+whlLeNxtzqwHxpiTEnEJp7zWmdXgpEs4zfvg/Z9D4aVw2f87/hQrxpijiriEU+EmnLy0k3wkvvyPzsh8k2dZp0xjTlHE/eWU1zXRS6D/yXTabG+Bxb+Fggsge6x3wRnTw0VcwqmobaJfn3hiojr51X3t8M9Z0LjbmbbFGHPKIq4dTnntgc5XGNeXwxt3Qsm7MOl2GDzZ2+CM6eEiLuFU1DVR1JlpfZf+DuY94DwKn/YrG3/YmCDw9JZKRKaKyCYRKRGRe4+y/1YRqRKRle7rGwH7fAHbZwcjHp9f2V3ffOInVA274Z37nPm8v1tsycaYIPGshCMiUcATwBSgHFgmIrNVdX2HQ/+qqrcf5S2aVHVMMGOq3NdMu1/JST3BE6riP4K/Ha74pdNPyhgTFF6WcMYDJapaqqqtwEvAVR5+3gkpcO1ZuYzMPk5Hy/ZWKH7WGUjLhpswJqi8TDg5QFnAerm7raNrRWS1iLwiInkB2+NFpFhElojI1UEJKDWBR6efyZl5qcc+aP1rsH8PTPhmMD7SGBMg3I/F3wAGqeoZwDzg+YB9A93J0W8EHhORI4obIjLTTUrFVVVVwYlo45vQJwcGXxSc9zPGHOJlwqkAAkssue62Q1S1WlVb3NVngLMD9lW4P0uBhcARLe5U9WlVLVLVor59+37+iFVhx1IYeI61JjbGA17+VS0DCkWkQERigRnAYU+bRGRAwOqVwAZ3e5qIxLnLmcC5QMfK5uCr3e408Muf6PlHGROJPHtKpartInI78A4QBTyrqutE5GGgWFVnA3eIyJVAO1AD3OqePhz4nYj4cZLiI0d5uhV8ZUudn3mWcIzxgqcN/1R1DjCnw7YHApZnAbOOct5HwGgvYzvE74M3vw/DvwQ7lkBcCmQND8lHGxNpIq6l8RG2zIUVz8P61yGuD+SNs1kzjfGI1YwufcoZLrS9Gep3WP2NMR6K7ISzZ6MzXOjEb8FF9zvbBn0hrCEZ05NF9i3Vst9DVBycdSskpsOQi6DfiHBHZUyPFbkJRxU2vQ2nXQpJGc42SzbGeCpyb6mqt8K+Chh8YbgjMSZiRG7C2fa+83PwheGMwpiIEtkJp08upA8OdyTGRIzITDh+P2z7AAZfYNO9GBNCkZlwKtdAUw0U2CNwY0IpMhPO9g+dn5ZwjAmpyEw4u9dAcj/okx3uSIyJKJGZcCrXQr9R4Y7CmIgTeQnH1wZVG6HfyHBHYkzEibyEU10CvlboH5rRL4wxn4m8hLN7rfPTSjjGhFzkJZzKtdArBjJPC3ckxkScyEw4fU+HqJhwR2JMxInAhLPObqeMCZPISjj7q6FhF/S3R+LGhENkJZzmOhh4LmQfMcWVMSYEImsArowh8O9zTnycMcYTkVXCMcaElSUcY0zIeJpwRGSqiGwSkRIRufco+28VkSoRWem+vhGw7xYR2eK+bvEyTmNMaHhWhyMiUcATwBSgHFgmIrOPMmXvX1X19g7npgMPAkWAAsvdc2u9itcY4z0vSzjjgRJVLVXVVuAl4KpOnnspME9Va9wkMw+Y6lGcxpgQ8TLh5ABlAevl7raOrhWR1SLyiojkneS5xphuJNyVxm8Ag1T1DJxSzPMnc7KIzBSRYhEprqqq8iRAY0zweJlwKoC8gPVcd9shqlqtqi3u6jPA2Z091z3/aVUtUtWivn37Bi1wY4w3RFW9eWORaGAzcDFOslgG3Kiq6wKOGaCqu9zla4AfqupEt9J4OXCWe+gK4GxVrTnO51UBn54grExg7yl+pWDqKnFA14nF4jhSV4mlM3EMVNUT/q/v2VMqVW0XkduBd4Ao4FlVXSciDwPFqjobuENErgTagRrgVvfcGhH5CU6SAnj4eMnGPeeEX1ZEilW16JS/VJB0lTig68RicRypq8QSzDg87dqgqnOAOR22PRCwPAuYdYxznwWe9TI+Y0xohbvS2BgTQSIt4Twd7gBcXSUO6DqxWBxH6iqxBC0OzyqNjTGmo0gr4RhjwigiEs6JOpF6/Nl5IrJARNaLyDoRudPdni4i89zOqfNEJC1E8USJyCci8qa7XiAiS91r81cRiQ1BDKluy/KNIrJBRCaF8Xp83/13WSsiL4pIfKiuiYg8KyJ7RGRtwLajXgdxPO7GtFpEzjr2Owcljl+4/z6rReQfIpIasG+WG8cmEbn0ZD6rxyecgE6klwEjgBtEZEQIQ2gH7lbVEcBE4Dvu598LvKeqhcB77noo3AlsCFj/OfArVR0K1AJfD0EMvwb+qaqnA2e68YT8eohIDnAHUKSqo3Cab8wgdNfkOY7sI3is63AZUOi+ZgJPehzHPGCU2wtgM+7TZPd3dwYw0j3nt+7fWOeoao9+AZOAdwLWZwGzwhjP6zg96DcBA9xtA4BNIfjsXJxf4ouANwHBadAVfbRr5VEMKcA23PrDgO3huB4H++yl4zQReROn43DIrgkwCFh7ousA/A644WjHeRFHh33XAH9xlw/7+8FpZzeps5/T40s4dKGOoCIyCBgLLAX6qdvKGtgN9AtBCI8B/wn43fUMoE5V2931UFybAqAK+KN7a/eMiCQRhuuhqhXA/wA7gF1APU4L91Bfk0DHug7h/D3+GvB2MOKIhITTJYhIMvAq8D1V3Re4T53/Kjx9XCgi04A9qrrcy8/phGicLitPqupYYD8dbp9CcT0A3PqRq3CSYDaQRBcaBiVU1+F4ROQ+nGqBvwTj/SIh4XSqI6iXRCQGJ9n8RVX/7m6uFJEB7v4BwB6PwzgXuFJEtuOMTXQRTl1KqtvvDUJzbcqBclVd6q6/gpOAQn09AL4IbFPVKlVtA/6Oc51CfU0CHes6hPz3WERuBaYBN7nJ73PHEQkJZxlQ6D55iMWp8Jodqg8XEQH+AGxQ1V8G7JoNHBw69Racuh3PqOosVc1V1UE412C+qt4ELAC+EsI4dgNlIjLM3XQxsJ4QXw/XDmCiiCS6/04HYwnpNengWNdhNvBV92nVRKA+4NYr6ERkKs7t95WqeqBDfDNEJE5ECnAqsT/u9Bt7XTHXFV7A5Tg17VuB+0L82efhFItXAyvd1+U49SfvAVuAd4H0EMZ0IfCmuzzY/YUpAf4GxIXg88cAxe41eQ1IC9f1AB4CNgJrgf8D4kJ1TYAXceqO2nBKfl8/1nXAqeB/wv0dXoPzZM3LOEpw6moO/s4+FXD8fW4cm4DLTuazrKWxMSZkIuGWyhjTRVjCMcaEjCUcY0zIWMIxxoSMJRxjTMhYwjHGhIwlHOM5ERkjIpcHrF8ZrGFCROR7IpIYjPcy3rN2OMZzbhP5Iu0wh3yQ3nu7+96dnk5FRKJU1RfsWMyJWQnHHCIig9wBsX7vDko1V0QSjnHsEBH5p4gsF5EPROR0d/t17mBWq0Rkkdud5GHgehFZKSLXi8itIvK/7vHPiciTIrJEREpF5EJ3QKgNIvJcwOc9Kc4sq+tE5CF32x04nS4XiMgCd9sNIrLGjeHnAec3isijIrIKmCQij4gzKNpqEfkfb66oOUIomo/bq3u8cMZEaQfGuOsvAzcf49j3gEJ3eQJO3yxwmt3nuMup7s9bgf8NOPfQOs7gTy/hNN2/CtgHjMb5z3B5QCwHm/hHAQuBM9z17UCmu5yN0z+qL06v9PnA1e4+Baa7yxk4zfIlME57ef+yEo7paJuqrnSXl+MkocO4Q22cA/xNRFbiDA41wN39IfCciNyGkxw64w11/vLXAJWqukZV/cC6gM+fLiIrgE9wRps72qiN44CF6vT+PjikwhfcfT6cHvvgjHvTDPxBRL4MHDjinYwnPJ0Iz3RLLQHLPuBot1S9cAapGtNxh6r+h4hMAK4AlovI2UecfezP9Hf4fD8Q7fZK/gEwTlVr3Vut+E68b6Bmdett1JkVdjxO7/CvALfjDNdhPGYlHHPS1BlAbJuIXAeHBvg+010eoqpL1ZlhtQpn7JQGoPfn+Mg+OAN11YtIP5zxfQ8KfO+PgQtEJNMdZ/cG4P2Ob+aW0FLUmRn2+zjjKpsQsBKOOVU3AU+KyI+AGJx6mFXAL0SkEKdO5j132w7gXvf262cn+0GqukpEPsEZRqIM57btoKeBf4rITlWd7D5uX+B+/luqerSxbHoDr4tIvHvcXScbkzk19ljcGBMydktljAkZu6UyxyUiT+CM8xvo16r6x3DEY7o3u6UyxoSM3VIZY0LGEo4xJmQs4RhjQsYSjjEmZCzhGGNC5v8HQjjS6NRTHBYAAAAASUVORK5CYII=\n",
- "text/plain": [
- ""
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "steps = np.arange(1, MAX_ESTIMATORS, 1)\n",
- "srfc = StreamingRFC(n_estimators_per_chunk=1,\n",
- " max_n_estimators=np.max(steps),\n",
- " max_features=x.shape[1])\n",
- "\n",
- "%time train_scores, test_scores = inc_partial_fit(x, y, srfc=srfc, steps=steps, sample=0.1)\n",
- "print(f\"With {len(srfc.estimators_)}: {train_scores[-1]} | {test_scores[-1]}\")\n",
- "plot_auc(steps, train_scores, test_scores)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Optimised parameters\n",
- "\n",
- "Using a better set of parameters for this dataset"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 12,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Fitting 3 folds for each of 100 candidates, totalling 300 fits\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.\n",
- "[Parallel(n_jobs=-1)]: Done 2 tasks | elapsed: 5.1s\n",
- "[Parallel(n_jobs=-1)]: Done 9 tasks | elapsed: 5.4s\n",
- "[Parallel(n_jobs=-1)]: Done 16 tasks | elapsed: 6.1s\n",
- "[Parallel(n_jobs=-1)]: Done 25 tasks | elapsed: 6.6s\n",
- "[Parallel(n_jobs=-1)]: Done 34 tasks | elapsed: 7.8s\n",
- "[Parallel(n_jobs=-1)]: Done 45 tasks | elapsed: 9.1s\n",
- "[Parallel(n_jobs=-1)]: Done 56 tasks | elapsed: 9.9s\n",
- "[Parallel(n_jobs=-1)]: Done 69 tasks | elapsed: 11.9s\n",
- "[Parallel(n_jobs=-1)]: Done 82 tasks | elapsed: 12.8s\n",
- "[Parallel(n_jobs=-1)]: Done 97 tasks | elapsed: 13.7s\n",
- "[Parallel(n_jobs=-1)]: Done 112 tasks | elapsed: 14.9s\n",
- "[Parallel(n_jobs=-1)]: Done 129 tasks | elapsed: 16.1s\n",
- "[Parallel(n_jobs=-1)]: Done 146 tasks | elapsed: 17.2s\n",
- "[Parallel(n_jobs=-1)]: Done 165 tasks | elapsed: 18.5s\n",
- "[Parallel(n_jobs=-1)]: Done 184 tasks | elapsed: 20.4s\n",
- "[Parallel(n_jobs=-1)]: Done 205 tasks | elapsed: 22.0s\n",
- "[Parallel(n_jobs=-1)]: Done 226 tasks | elapsed: 23.2s\n",
- "[Parallel(n_jobs=-1)]: Done 249 tasks | elapsed: 25.5s\n",
- "[Parallel(n_jobs=-1)]: Done 272 tasks | elapsed: 28.5s\n",
- "[Parallel(n_jobs=-1)]: Done 300 out of 300 | elapsed: 30.6s finished\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "{'bootstrap': True, 'class_weight': None, 'criterion': 'gini', 'max_depth': None, 'max_features': 'auto', 'max_leaf_nodes': None, 'min_impurity_decrease': 0, 'min_impurity_split': None, 'min_samples_leaf': 60, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 30, 'n_jobs': -1, 'oob_score': False, 'random_state': None, 'verbose': 0, 'warm_start': False}\n",
- "n_ests: 119\n",
- "Train AUC: 0.8107342572755221\n",
- "Test AUC: 0.6393736367965102\n"
- ]
- }
- ],
- "source": [
- "from sklearn.model_selection import RandomizedSearchCV as RCV\n",
- "\n",
- "grid = RCV(RandomForestClassifier(n_estimators=30, \n",
- " n_jobs=-1),\n",
- " param_distributions={'min_samples_leaf': [1, 2, 10, 30, 60, 120, 240, 480],\n",
- " 'min_samples_split': [2, 10, 30, 60, 120, 240, 480],\n",
- " 'min_impurity_decrease': [0, 0.05, 0.1, 0.2, 0.3]},\n",
- " cv=3,\n",
- " n_iter=100,\n",
- " verbose=10,\n",
- " n_jobs=-1)\n",
- "\n",
- "x_train, x_test, y_train, y_test = train_test_split(x, y, \n",
- " test_size=0.25,\n",
- " random_state=1)\n",
- "\n",
- "grid.fit(x_train, y_train)\n",
- "print(grid.best_estimator_.get_params(deep=True))\n",
- "\n",
- "tr_score, te_score = score(grid,\n",
- " train=(x_train, y_train),\n",
- " test=(x_test, y_test),\n",
- " pr=True)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 13,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "{'bootstrap': True,\n",
- " 'class_weight': None,\n",
- " 'criterion': 'gini',\n",
- " 'max_depth': None,\n",
- " 'max_features': 'auto',\n",
- " 'max_leaf_nodes': None,\n",
- " 'min_impurity_decrease': 0,\n",
- " 'min_impurity_split': None,\n",
- " 'min_samples_leaf': 60,\n",
- " 'min_samples_split': 2,\n",
- " 'min_weight_fraction_leaf': 0.0,\n",
- " 'oob_score': False,\n",
- " 'random_state': None,\n",
- " 'verbose': 0}"
- ]
- },
- "execution_count": 13,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "params = grid.best_estimator_.get_params()\n",
- "params.pop('warm_start', None)\n",
- "params.pop('n_jobs', None)\n",
- "params.pop('n_estimators', None)\n",
- "params"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Standard random forest"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 14,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "CPU times: user 5.44 s, sys: 46.9 ms, total: 5.48 s\n",
- "Wall time: 5.55 s\n",
- "With 111: 0.8330460502886542 | 0.649373182395347\n"
- ]
- },
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAARwAAAELCAYAAAALJznDAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3Xt4VfWd7/H3N7e9cyfkAkIIQQmISgc0oFan1WkV1I7antaC5Yx2amnPqR1r59jiM62t9o/j3DqdPmO12lJ7epE6trXUMhZb8TLVCgFRAUnCnYAkIRDYuWfv/T1/rBXYhITsJHvtlWR/X8+znr3W2mvt/c0i+fBbt98SVcUYY5Ihze8CjDGpwwLHGJM0FjjGmKSxwDHGJI0FjjEmaSxwjDFJY4FjjEkaCxxjTNJY4BhjkibD7wISpaSkRCsrK/0uw5iUtHnz5qOqWjrUchMmcCorK6mpqfG7DGNSkojsj2c526UyxiSNBY4xJmkscIwxSWOBY4xJGgscY0zSWOAYY5LGAscYkzQT5jocY8zgVJXucNQdInT3Oq9dvWfP6w5H6e6N0hWOcP1FU5laGExYHRY4xowBqkpXb5T2njCdPRHae8J09ETo6I7Q0ROmszdCuzve0RNxB2f8jOV7wnT1Runq7QuOyKmgGYnZpXkWOMaMFb2RKO3dYUJdYdq6Y4au06+hU9O9tMUuG/vaE2Y4zzPISk8jJ5BOTmY62Vnp5AYyyM5Mpyw/SDAzjWBGOoHMNAKxrxlpBDLSCGa64+7rqekMZ7lg5un3CrMzE7q9LHBMylFVOnsjp8Kg/axgODs4Qm5gtHdHzpju6h265SACeVkZ5AUzyAtkkBvIID+YwdSCIHmBM+fnZKWTk+W8Zmelk5vVN8+Zn+2OZ6aPz8OvFjhmzOsOR2gOdXO8vfesXY7OfrsXzrwBdjt63eluZ/ckGkdrIiNNyA/2BUImeYF0SvKyqCzJJc8NjdysjFPL5MeER37fOsEMcjLTSUsT7zfUOGCBY3zT2ROhKdRFU6ibppPdNJ50x0NdNPfNC3XR2tE75GeJQHbm6dZBXwshJyud4rzA6XmZzvt5QbelEcg4o5WR787PC2QQyEhDxIIikSxwTMJFokpzqJtDrR0cau2i8UTXmcES6qL5ZDeh7vBZ62amC6V5AUoLgswszmHRrCLK8oOU5QcozguQm3XmMYu+XY1gpoXDeGCBY4atsyfC4ROdHDreyeHWTg71Dcc7OXyik/dauwj322cJZKRRVhBgSn6QC6fm84GqUkrzA5TlB5hSEKSsIEBZfpBJ2Zm2+zGBWeCYs5zo7OVAS8epFkpssBxu7aSlveeM5dMEphYEmV6UzaUVRUx/XzbTJmUzfZLzOrUwSEEww1ogxgInVZ3s6mXf0Xb2Hm1nf0uHM97ijB/rFyg5WemnwmN+eSHTY8Jk2qQgUwuCZIzTsyYmuSxwJrBQVy/7jnawr6X9VKDscwOmfytlWmGQypJcllw8lVklOcwszmX6pGzKi7IpzM601olJCAucCeJEZy+/feswbx5oZX9LO/ta2jnadmaoTC0IUlmSw/UXT6GyOJfKklwqi3OZWZxDMDPdp8pNKrHAGcdUlc37j/PzjQdY9857dPVGKcsPMKsklw9dOIXKklxmleRQWZLLzMm5ZGdZqBh/WeCMQ8fbe/jllgbWbDrIrqY28gIZfOzScpYvqmB+eaHf5RkzKAuccUJVeX1PC2s2HuT5bUfoiURZWDGJf/of7+Om951HbsD+Kc3YZ7+lY1xzqJtnNjfwi00H2NfSQUEwg9svr2DZ4hlcOLXA7/KMGRYLnDEoGlVe3XWUp944wB/ebSQcVRZXTuaeD1dxwyXn2QFeM25Z4IwhR0508XTNQX6x6SCHWjspysnk01dV8slFFcwuy/O7PGNGzQLHZ6rKhtomfv7GAV7c2URU4arZxay64UKuv3gKgQxrzZiJwwLHR6rKPz5fy2Mv76YkL8DnP3gBn1w0g5nFuX6XZownLHB89MiGXTz28m4+dXkF37z54nHbqZIx8bLA8cmP/rSXf1lfx8cWTudbt1xid0iblODpf6kislREakVkl4isGuD9ChHZICJvisjbInJjzHv3u+vVisgSL+tMtqc3HeTB3+5gycVT+KePv8/CxqQMz1o4IpIOPAJcBzQAm0RkraruiFnsa8DTqvqoiFwErAMq3fFlwMXANOAPIjJHVSNe1Zssz719mFW/epsPzCnlu8sX2l3WJqV4+du+GNilqntUtQdYA9zSbxkF+q5eKwQOu+O3AGtUtVtV9wK73M8b117c2ciX1myleuZkvr/iMjsDZVKOl4EzHTgYM93gzov1TWCFiDTgtG6+OIx1x5XXdh/l8z/dwkXTCvjhndV2I6VJSX6355cDT6pqOXAj8BMRibsmEVkpIjUiUtPc3OxZkaO15cBx7vpxDZXFOfz404vJDyb2WT/GjBdeBs4hYEbMdLk7L9ZngKcBVPV1IAiUxLkuqvq4qlaranVpaWkCS0+cHYdPcufqjZTlB/jpZy6nKDfL75KM8Y2XgbMJqBKRWSKShXMQeG2/ZQ4AHwIQkXk4gdPsLrdMRAIiMguoAjZ6WKsndje38T9/+Aa5gQx+etfllBUk7pGpxoxHnp2lUtWwiNwN/B5IB1ar6nYReQioUdW1wN8DT4jIvTgHkO9UVQW2i8jTwA4gDHxhvJ2hOnisgxU/eAMR+Nldl1NelON3Scb4TnQ4DzQew6qrq7WmpsbvMgBoPNnFJx57nROdvaxZeQXzzrNuJMzEJiKbVbV6qOX8Pmg84Rxr72HFD96gpa2bJz+9yMLGmBh2a0MCnezq5Y7VGzlwrIMnP72YhRVFfpdkzJhiLZwE6egJ85knN7HzyEkeW3EZV15Q7HdJxow5FjgJ0B2O8LmfbGbz/uN855MLufbCMr9LMmZMsl2qUQpHonzx52/yav1R/vnjTofmxpiBWQtnFKJR5b5n3mb9jkYevPliPlE9Y+iVjElhFjgjpKp8/Tfb+PWbh7hvyVzueH+l3yUZM+ZZ4IzQ917azc/eOMD/uuYCvnDtbL/LMWZcsMAZoefefo/FsybzlSVz/S7FmHHDAmcEwpEou5vbWDBjEiLWW58x8bLAGYH9xzroCUeZMyXf71KMGVcscEagvjEEwJwp9nA6Y4bDAmcEao+0IYI9DdOYYbLAGYG6phAzinLIybLrJo0ZDgucEag7ErLjN8aMgAXOMPWEo+w92m7Hb4wZAQucYdp7tJ1wVJk71Vo4xgyXBc4w1blnqKrKLHCMGS4LnGGqawyRniacX5rrdynGjDsWOMNU1xhiZnEOwUx7kJ0xw2WBM0x1jW3MtTNUxoyIBc4wdPVG2N/STpUFjjEjYoEzDLua2ogq1sIxZoQscIahvsnuoTJmNCxwhqH2SBuZ6UJliZ2hMmYkLHCGob4xxPkleWSm22YzZiTsL2cYahtDzLErjI0ZMQucOLV3h2k43skc65LCmBHzNHBEZKmI1IrILhFZNcD7/yYiW92hTkRaY96LxLy31ss641Hf1AZgLRxjRsGzDl1EJB14BLgOaAA2ichaVd3Rt4yq3huz/BeBhTEf0amqC7yqb7jqTvXyZ4FjzEh52cJZDOxS1T2q2gOsAW45x/LLgac8rGdU6o6ECGSkUTE5x+9SjBm3vAyc6cDBmOkGd95ZRGQmMAt4MWZ2UERqROTPInKrd2XGp66pjdlleaSn2VMajBmpsdJH5jLgGVWNxMybqaqHROR84EUReUdVd8euJCIrgZUAFRUVnhZYdyTE+y8o9vQ7jJnovGzhHAJiH7Zd7s4byDL67U6p6iH3dQ/wEmce3+lb5nFVrVbV6tLS0kTUPKATnb0cOdll91AZM0peBs4moEpEZolIFk6onHW2SUQuBIqA12PmFYlIwB0vAa4CdvRfN1n6Hgszd6qdEjdmNDzbpVLVsIjcDfweSAdWq+p2EXkIqFHVvvBZBqxRVY1ZfR7wfRGJ4oTiw7Fnt5KtrtE5JW69/BkzOp4ew1HVdcC6fvMe6Df9zQHWew2Y72Vtw1HXGCInK53pk7L9LsWYcc2uNI5DXWOIqin5pNkZKmNGxQInDnWNIbulwZgEsMAZQktbN0fbeuyxMMYkgAXOEE4dMLZT4saMmgXOEPp6+bNuRY0ZPQucIdQeCZEfzGBKQcDvUowZ9yxwhlDvPhZGxM5QGTNaFjjnoKrUuqfEjTGjZ4FzDs2hbk509jLXntJgTEJY4JxDrXW6ZUxCWeCcQ98pcetW1JjEsMA5h7ojISbnZlGSZ2eojEkEC5xzqGsK2VM2jUkgC5xBqCr1jW12/MaYBLLAGcThE120dYctcIxJIAucQdQdsTNUxiSaBc4gTj+Hyo7hGJMoFjiDqG0MUZYfYFJOlt+lGDNhWOAMor6xzfrAMSbBLHAGEI0q9U0h6zTdmASzwBnAweMddPVG7bEwxiSYBc4Aat0zVHaXuDGJZYEzgPqmvudQWQvHmESywBlA7ZEQ0ydlkx/M9LsUYyYUC5wB1DXaPVTGeCGuwBGRK0QkP2a6QEQu964s/4QjUfY0t9sVxsZ4IN4WzqNAW8x0mztvwtnX0kFPJGqBY4wH4g0cUVXtm1DVKB4/l9wvddbLnzGeiTdw9ojI34lIpjvcA+wZaiURWSoitSKyS0RWDfD+v4nIVneoE5HWmPfuEJF6d7gj/h9pdOoaQ4jAbDtDZUzCxdtK+TzwXeBrgAJ/BFaeawURSQceAa4DGoBNIrJWVXf0LaOq98Ys/0VgoTs+GfgGUO1+32Z33eNx1jtidY0hKibnkJ2V7vVXGZNy4gocVW0Clg3zsxcDu1R1D4CIrAFuAXYMsvxynJABWAK8oKrH3HVfAJYCTw2zhmGrs063jPFMXIEjIj/CaWmcQVX/9hyrTQcOxkw3AAOe2RKRmcAs4MVzrDt9gPVW4ra0KioqzlFKfLrDEfYebWfJxVNG/VnGmLPFu0v1XMx4EPgocDiBdSwDnlHVyHBWUtXHgccBqqurzwrE4dp7tJ1IVK2FY4xH4t2l+mXstIg8Bfz3EKsdAmbETJe78wayDPhCv3Wv6bfuS3GUOiq11sufMZ4a6ZXGVUDZEMtsAqpEZJaIZOGEytr+C4nIhUAR8HrM7N8D14tIkYgUAde78zxV39hGeppwfmmu119lTEqK9xhOiNPHcBRoBL5yrnVUNSwid+MERTqwWlW3i8hDQI2q9oXPMmBNv+t8jonIt3BCC+ChvgPIXqptDFFZnEMgw85QGeOFeHep8t1T1VU4x3BggIPIA6y3DljXb94D/aa/Oci6q4HV8dSXKPWNIS6aVpDMrzQmpcTbwrkLuAfnWMpW4AqcXaC/8q605OrsibD/WAe3LDjrZJgxJkHiPYZzD7AI2K+q1+JcoNd67lXGl93Nbahi/Rgb46F4A6dLVbsARCSgqjuBud6VlXynz1DZLQ3GeCXe63AaRGQS8CzwgogcB/Z7V1by1TWFyEpPY2axnaEyxivxHjT+qDv6TRHZABQCz3tWlQ/qjoQ4vzSXzHTrk8wYrwy7iwlVfdmLQvxW19jGZTOL/C7DmAnN/jsH2rrDHGrttOM3xnjMAgfn+huwWxqM8ZoFDtbLnzHJYoGDc/wmmJnGjMk5fpdizIRmgYPTwpldlkd6mvhdijETmgUOfc+hst0pY7yW8oFzoqOXxpPdFjjGJEHKB05dk3PAeK4FjjGes8Bxz1BV2TU4xnjOAudIiNysdKZPyva7FGMmPAucxjaqpuQjYmeojPGaBU5jyI7fGJMkKR04R9u6aWnvseM3xiRJSgdO3wFj6+XPmORI6cCpb2wD7B4qY5IlpQOntjFEYXYmZfkBv0sxJiWkdODUN4aYMyXPzlAZkyQpGziqSu2REFW2O2VM0qRs4DSFujnZFbZT4sYkUcoGTt9jYeyUuDHJk7KBc+qUuLVwjEmalA6c4twsivPsDJUxyeJp4IjIUhGpFZFdIrJqkGVuE5EdIrJdRH4eMz8iIlvdYW2ia6trbLPrb4xJsmE/lypeIpIOPAJcBzQAm0RkraruiFmmCrgfuEpVj4tIWcxHdKrqAi9qU1XqG0N8/LJyLz7eGDMIL1s4i4FdqrpHVXuANcAt/Zb5LPCIqh4HUNUmD+s55VBrJ+09EebYLQ3GJJVnLRxgOnAwZroBuLzfMnMARORPQDrwTVXte4RwUERqgDDwsKo+2/8LRGQlsBKgoqIi7sLssTBjUDQCvZ0Q7oLeDme8bwi7r9EIBPIhUADBgtOvGUFIxsWb4R7oaYPuk9AdcobeDlAFjTr1adQd3PFodIB5scvFDAikpYOkOT+P9I2nxczvN5wx3x3XiLsdu5xtF+4+vW2HO/+2n0BF/z/bkfMycOL9/irgGqAceEVE5qtqKzBTVQ+JyPnAiyLyjqrujl1ZVR8HHgeorq7WeL+0ru8eqjILnISKRuDkYWg9ACcOOq+t+6HrhPPLHBse/cMk0jPy703LiAmhfAgUng6kQH6/8ULnNdJzOjRiA6S7bZD5IYh0J25b+SU9ywnojCBkBiEjGzICkJntzAsWnjk/e1JCv97LwDkEzIiZLnfnxWoA3lDVXmCviNThBNAmVT0EoKp7ROQlYCGwmwSoOxJiSkGAwpzMRHxc6oiEIeQGSusBaI0JldYDcPIQRMNnrpM3BbInO7/QmdmQXQT550FmjvOLnZnj/vL3n84+PWS4r2npzh9+V18QnHDHT8bMP+m8th6MeT/k/K9/LukBN6zyT7eiCqbHTOednh+7XEb22a2Ps1odMnhrRNIgLQ0QQM/dWhqsxXRqeXddSXNDI3h6W2YEnPG0dK9+O+LiZeBsAqpEZBZO0CwDbu+3zLPAcuBHIlKCs4u1R0SKgA5V7XbnXwX8U6IKq2uyx8IMSBU6WqBlF7TsPrulcuJQvz9cccJj0gyYsRgmVZweCiugsNz5xfebqrPrExtOGYHTAZKVBxlZfleZEjwLHFUNi8jdwO9xjs+sVtXtIvIQUKOqa933rheRHUAEuE9VW0Tk/cD3RSSKc2D74dizW6MRiSr1jW2suGJmIj5ufOppdwKlL1hadrlDvbP7c4pAwTQnQGZcAfMr+oVKufOHO9aJQFauM3Ce39WkNE+P4ajqOmBdv3kPxIwr8GV3iF3mNWC+FzUdPNZBdzg68a8wjoSdVknLbidIToXKbmfXJ1ZBORRfAPM/AcWznWHy+VA4w/7nNwnl90HjpKudiI+F6e2Egxth36vQuB2O1sPxvWceTwlOgpIqmPUBJ1yKq04HS5Y9U90kR8oFTv2pwBnHLZxILxzaAntfgb0vO2ET6XYORJbOhbJ5MO+vT7dWSqogZ7LfVRuTeoFT29jG9EnZ5AXG0Y8ejcCRt92AeRX2vwa97YDA1Pmw+LNOy6XiSucUsDFj1Dj6q0uM+sbQ2O80XRWad7oB84qzq9R3MLdkLiy43QmYyqut5WLGlZQKnN5IlN3NbXxwbqnfpZxJFY7tcYKlL2Tam533Js2EeTfDrA/CrL+E/Kn+1mrMKKRU4Oxvaac3omPnDNWxPbDlJ/DOfzrXu4BzXcsFf+W2YP4SilL49L2ZcFIqcM4rzObJTy/iovN8PM4R7oadz8HmHzsHfCUNZn8Yrv6S04opnp2c+4KM8UFKBU5uIINr5pYNvaAXmutgy4/hraecq3kLK+Daf4AFn4LC6f7UZEySpVTgJF1vJ2x/1gmaA687NxnOvREuuwPOv9b3+1qMSTYLHC8cecfZZXr7aecGwskXwIcfdM4u5fnUwjJmDLDASZTuNtj2S9j8JBze4tx9fNHNcOkdzulrOy5jjAXOqKg64bL5x07Y9LRB6TxY+jC875N2jYwx/VjgjNTuF2H9A9D4jtOHy8Ufc47NlC+y1owxg7DAGYlILzz7v52uGW76tnOXtd1SYMyQLHBGYufvIPQeLP8FzF3qdzXGjBsp+yC8Udn4hHPLQdV1fldizLhigTNcjdth/3/DorvsOhpjhskCZ7g2PuF0Rr1whd+VGDPuWOAMR2crvP0LmP9xO+VtzAhY4AzHW085vf8v+qzflRgzLlngxCsadXanZlwO0zx55LkxE54FTrz2vAjHdlvrxphRsMCJ18YfQG4ZXHSL35UYM25Z4MTj+D6oex4uu9Oe02TMKFjgxGPTD52e+ao/7XclxoxrFjhD6e2EN38C8z7iPPbWGDNiFjhD2fZL6DwOi1f6XYkx454Fzrmowhvfh7KLYOZVfldjzLjnaeCIyFIRqRWRXSKyapBlbhORHSKyXUR+HjP/DhGpd4c7vKxzUA2bnCdeLrrL+rgxJgE8655CRNKBR4DrgAZgk4isVdUdMctUAfcDV6nqcREpc+dPBr4BVAMKbHbXPe5VvQPa+DgECpze+4wZRG9vLw0NDXR1dfldiueCwSDl5eVkZmaOaH0v+8NZDOxS1T0AIrIGuAXYEbPMZ4FH+oJEVZvc+UuAF1T1mLvuC8BS4CkP6z1TW5PzxIVFd0EgL2lfa8afhoYG8vPzqaysRCZwS1hVaWlpoaGhgVmzZo3oM7zcpZoOHIyZbnDnxZoDzBGRP4nIn0Vk6TDWRURWikiNiNQ0NzcnsHScfoqjvU7gGHMOXV1dFBcXT+iwARARiouLR9WS8/ugcQZQBVwDLAeeEJFJ8a6sqo+rarWqVpeWJvB54ZFeqFntPHK3ZHbiPtdMWBM9bPqM9uf0MnAOATNipsvdebEagLWq2quqe4E6nACKZ13v7PwdhA7bqXAzLrS2tvK9731v2OvdeOONtLa2elDR4LwMnE1AlYjMEpEsYBmwtt8yz+K0bhCREpxdrD3A74HrRaRIRIqA6915ybHpBzCpAqquT9pXGjNSgwVOOBw+53rr1q1j0qS4dygSwrODxqoaFpG7cYIiHVitqttF5CGgRlXXcjpYdgAR4D5VbQEQkW/hhBbAQ30HkD3XuAP2veo8KdO6EDXjwKpVq9i9ezcLFiwgMzOTYDBIUVERO3fupK6ujltvvZWDBw/S1dXFPffcw8qVTsu9srKSmpoa2trauOGGG7j66qt57bXXmD59Or/5zW/Izs5OeK2iqgn/UD9UV1drTU3N6D/ouXth68/hy+9ar34mLu+++y7z5s0D4MHfbmfH4ZMJ/fyLphXwjb++eND39+3bx0c+8hG2bdvGSy+9xE033cS2bdtOnUk6duwYkydPprOzk0WLFvHyyy9TXFx8RuDMnj2bmpoaFixYwG233cbNN9/MihUDd6Mb+/P2EZHNqlo91M9ij4mJ1XUC3voFXGJdiJrxa/HixWectv7ud7/Lr3/9awAOHjxIfX09xcXFZ6wza9YsFixwOpa77LLL2Ldvnye1WeDE2voU9LbDYutky4zMuVoiyZKbm3tq/KWXXuIPf/gDr7/+Ojk5OVxzzTUDntYOBAKnxtPT0+ns7PSkNr9Pi48d0ShsesJ5VK91IWrGkfz8fEKh0IDvnThxgqKiInJycti5cyd//vOfk1zdmayF02fPBmjZBR97wu9KjBmW4uJirrrqKi655BKys7OZMmXKqfeWLl3KY489xrx585g7dy5XXHGFj5XaQePTnlru3Kx573bnmeHGxGmgg6gT2WgOGtsuFcDx/VD7X24XohY2xnjFAgegxu1C9DLrQtQYL1ng9HbClv8HF94EhWfdH2qMSSALnG2/si5EjUmS1A4cVdj4fSidB5VX+12NMRNeagdOQw289xYsti5EjUmG1A6cU12ILvO7EmNGbKTdUwB85zvfoaOjI8EVDS51A6etCXY8Cwtuty5Ezbg2ngInda803vJjiPRYF6Jm3IvtnuK6666jrKyMp59+mu7ubj760Y/y4IMP0t7ezm233UZDQwORSISvf/3rNDY2cvjwYa699lpKSkrYsGGD57WmZuBEwrBpNZx/LZRU+V2NmUj+axUceSexnzl1Ptzw8KBvP/zww2zbto2tW7eyfv16nnnmGTZu3IiqcvPNN/PKK6/Q3NzMtGnT+N3vfgc491gVFhby7W9/mw0bNlBSUpLYmgeRmrtUtdaFqJmY1q9fz/r161m4cCGXXnopO3fupL6+nvnz5/PCCy/w1a9+lVdffZXCwkJf6kvNFs7GJ6CwAuYs8bsSM9GcoyWSDKrK/fffz+c+97mz3tuyZQvr1q3ja1/7Gh/60Id44IEHkl5f6rVwmt51uhBd9LfWhaiZEGK7p1iyZAmrV6+mra0NgEOHDtHU1MThw4fJyclhxYoV3HfffWzZsuWsdZMh9Vo4G5+A9AAs/Bu/KzEmIWK7p7jhhhu4/fbbufLKKwHIy8vjpz/9Kbt27eK+++4jLS2NzMxMHn30UQBWrlzJ0qVLmTZtWlIOGqdW9xRdJ+Bf58HFt8KtIzuNaEx/1j2FdU8xsFAjlF1op8KN8Ulq7VKVzoHPvuh3FcakrNRq4RhjfGWBY0wCTJRjoUMZ7c9pgWPMKAWDQVpaWiZ86KgqLS0tBIPBEX9Gah3DMcYD5eXlNDQ00Nzc7HcpngsGg5SXl494fQscY0YpMzPzjCddmsHZLpUxJmkscIwxSWOBY4xJmglza4OINAP7h1isBDiahHKGw2oa2lirB6ym/maqaulQC02YwImHiNTEc79HMllNQxtr9YDVNFK2S2WMSRoLHGNM0qRa4DzudwEDsJqGNtbqAatpRFLqGI4xxl+p1sIxxvgoJQJHRJaKSK2I7BKRVT7VMENENojIDhHZLiL3uPMni8gLIlLvvhb5UFu6iLwpIs+507NE5A13e/1CRLKSXM8kEXlGRHaKyLsicqXf20lE7nX/3baJyFMiEkz2dhKR1SLSJCLbYuYNuF3E8V23trdF5FIva4vXhA8cEUkHHgFuAC4ClovIRT6UEgb+XlUvAq4AvuDWsQr4o6pWAX90p5PtHuDdmOl/BP5NVWcDx4HPJLmefweeV9ULgb9wa/NtO4nIdODvgGpVvQRIB5aR/O30JLC037zBtssNQJU7rAQe9bi2+KjqhB6AK4Hfx0zfD9w/Bur6DXAdUAuc5847D6hNch3lOL+ofwU8BwjOxWMZA22/JNRTCOzFPb4YM9+37QRMBw4Ck3FueH4OWOIPoSoAAAAE4klEQVTHdgIqgW1DbRfg+8DygZbzc5jwLRxO/7L0aXDn+UZEKoGFwBvAFFV9z33rCDAlyeV8B/gKEHWni4FWVQ2708neXrOAZuBH7m7eD0QkFx+3k6oeAv4FOAC8B5wANuPvduoz2HYZc7/3kAK7VGONiOQBvwS+pKonY99T57+ipJ02FJGPAE2qujlZ3xmHDOBS4FFVXQi002/3yYftVATcghOG04Bczt618V2yt8tIpELgHAJmxEyXu/OSTkQyccLmZ6r6K3d2o4ic575/HtCUxJKuAm4WkX3AGpzdqn8HJolIX19Jyd5eDUCDqr7hTj+DE0B+bqcPA3tVtVlVe4Ff4Ww7P7dTn8G2y5j5vY+VCoGzCahyzyhk4RzsW5vsIkREgB8C76rqt2PeWgvc4Y7fgXNsJylU9X5VLVfVSpzt8qKqfgrYAHzcp5qOAAdFZK4760PADnzcTji7UleISI7779hXk2/bKcZg22Ut8Dfu2aorgBMxu17+8fsgUjIG4EagDtgN/INPNVyN09x9G9jqDjfiHDP5I1AP/AGY7FN91wDPuePnAxuBXcB/AoEk17IAqHG31bNAkd/bCXgQ2AlsA34CBJK9nYCncI4h9eK0BD8z2HbBOfj/iPs7/w7OGbak/171H+xKY2NM0qTCLpUxZoywwDHGJI0FjjEmaSxwjDFJY4FjjEkaCxxjTNJY4BjPicgCEbkxZvrmRHUTIiJfEpGcRHyW8Z5dh2M8JyJ34lx4drcHn73P/ey4H48iIumqGkl0LWZo1sIxp4hIpdvh1RNuZ1PrRSR7kGUvEJHnRWSziLwqIhe68z/hdlL1loi84t5O8hDwSRHZKiKfFJE7ReQ/3OWfFJFHReTPIrJHRK5xO5p6V0SejPm+R0Wkxq3rQXfe3+HcTLlBRDa485aLyDtuDf8Ys36biPyriLwFXCkiD4vTGdrbIvIv3mxRcxa/L3W2YewMOH2thIEF7vTTwIpBlv0jUOWOX45zHxY4l9FPd8cnua93Av8Rs+6paZxOpdbgXIp/C3ASmI/zn+HmmFr6LtlPB14C3udO7wNK3PFpOPc9leLcdf4icKv7ngK3uePFOP3DSGydNng/WAvH9LdXVbe645txQugMbhcb7wf+U0S24nT2dJ779p+AJ0XkszjhEI/fqvOX/w7QqKrvqGoU2B7z/beJyBbgTeBinN4b+1sEvKTOXd1h4GfAB9z3Ijh36oPTn00X8EMR+RjQEWedZpQyhl7EpJjumPEIMNAuVRpO51ML+r+hqp8XkcuBm4DNInLZML4z2u/7o0CGiMwC/g+wSFWPu7tawTg+N1aXusdtVDUsIotx7vr+OHA3TtccxmPWwjHDpk7HYXtF5BNwqsPuv3DHL1DVN1T1AZye+2YAISB/FF9ZgNMR1wkRmYLTX2+f2M/eCHxQRErcvqyXAy/3/zC3hVaoquuAe3H6TTZJYC0cM1KfAh4Vka8BmTjHYd4C/llEqnCOyfzRnXcAWOXufv3f4X6Rqr4lIm/idA9xEGe3rc/jwPMiclhVr3VPt29wv/93qjpQHzX5wG9EJOgu9+Xh1mRGxk6LG2OSxnapjDFJY7tU5pxE5BGc/ntj/buq/siPesz4ZrtUxpiksV0qY0zSWOAYY5LGAscYkzQWOMaYpLHAMcYkzf8HNPeptBYF2asAAAAASUVORK5CYII=\n",
- "text/plain": [
- ""
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "steps = np.arange(1, MAX_ESTIMATORS, 10)\n",
- "\n",
- "rfc = RandomForestClassifier(warm_start=True,\n",
- " **params)\n",
- "\n",
- "%time train_scores, test_scores = inc_fit(x, y, rfc=rfc, steps=steps)\n",
- "print(f\"With {len(rfc.estimators_)}: {train_scores[-1]} | {test_scores[-1]}\")\n",
- "plot_auc(steps, train_scores, test_scores)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### As normal random forest\n",
- "1 estimator per full subset"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 15,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "CPU times: user 24.4 s, sys: 0 ns, total: 24.4 s\n",
- "Wall time: 24.6 s\n",
- "With 119: 0.82665000402489 | 0.6500849442174572\n"
- ]
- },
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAARwAAAELCAYAAAALJznDAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3Xl8VPW9//HXJ5NJJntIwr4IQkAQCljcl7qL2uty21q0/VVvF3pva2vbX+3VX1trbX+/a2+Xa3trbe0ttffeVq+1i2hxwQpqXQkIskNYhCRAQsi+zvL5/fE94BACJDBnJsl8no/HPDJz5iyfHJh3vud7zvmOqCrGGJMMGakuwBiTPixwjDFJY4FjjEkaCxxjTNJY4BhjksYCxxiTNBY4xpikscAxxiSNBY4xJmkyU11AopSVlenEiRNTXYYxaWnlypX7VXX48eYbMoEzceJEKioqUl2GMWlJRN7ty3x2SGWMSRoLHGNM0ljgGGOSxgLHGJM0FjjGmKSxwDHGJI0FjjEmaYbMdTjGmL6LxZT2cJTOcBRViMRi7G/ppqUrTCSqNHeGOdDWzfzTRzGiMJSw7VrgGDMAdEWiVDd0sLe5k+5IjLL8bELBDFQhFAyQn51JXnYmWZnuoCQSjdHYEWbH/jZqm7voikRp747S1hWhtStCY3uYvc2dNHeECUdj5GZlkpcdoCMco66li+11rXRFYseta1JZngWOMakSicbY39rN7oZ2ojElGMggpkpWIIPxJbkMyw0iIgC0d0fY39LNgfZuGtu7CUeVSDRGfZtbfn11M3ubO2ntjLCvpZO+fJ+BCAREiMSOPrMIFIaCjCoMUZQbJDcrk/buCLUtneQEA4wqzOb8yaWMKMwmFAwgIgREKMvPojAnSDAgFISCDMvNYlhuMFG7DrDAMeaQfc2drK1qoqqhndqWLho7wtQ2d1LT2Mn+1i4a28N0R4/dKsjOzGBYbhYtnWHauqNHnS8YEKaNKqB8RD752ZmMKc5hQkkuo4tCZGVmsL+1i3DUhUpHOEprZ4S2rgjd0RjRmBIKBigIZTKxNI/RxSFCmQFyswLkZWeSmxU4FHoDjQWOGdSiMaWlM0xdSxc1TZ3sqGulrTtKIENo747S3BGmvq2b7kiUwlCQFq81AZCZIURjSnt3lP2t3exv7Tq03kCGUJQTZERBNmOKc5g1tojivCB5WZkMy8tiQkkuwYDQHYmRmZFBe3eEXQdcUB1o66YwFKSsIIvh+dmU5GVRnBskOzNAhggleVmU5mcRDKTfORsLHDOgqSo79rexYucBNu1tYW9TJwWhTJo6wqza1UhdS9cxly/IzqQ0P4uszAyaOyLkhzIZVRhCxIVVdqZQkpfN7HHFTB1VwJzxRUwoyaM0L4uMjIHZShjMLHBMyqkqlbWtvLJ1P7sOtNPQ3k1AhObOCKt3Nx5qeeQEA4wuDtHWFSEnGODCKWWML8mlIJTJ8IJsRhWGmDQ8j6KcoBcmAQIWGgOKBY5JOlVla20rT66u5uUt+9lZ30ZLZwRwLZKS/CwvMDK4aGoZ7z9lGGdPKuXUsjxrdQxyFjjGV6rKnqZODrR1s+tAOyvfbWDZplq2728jQ+CsSSXcOHcs00cXctHU4Ywtzkl1ycZHFjjmpDS1h9nT3MGBtm72NHbybn0bm/a20N4dpTAnkzW7m6hu7Dg0f1ZmBmdNLOEfzp/I/JmjGV6QncLqTbJZ4Jjj6uiOUt3oQqWxvZvali7WVTexalcDW/a1HjZvhriLxQpCQaoa2pkxppDPfuBURhaGGFUY4rTRBWRnBlL0m5hUs8AxR2hs7+btXY28tm0/b+44wPqaZqI9LjQrzg3yvnHFXDd7DKcOz6c4J8ioohBjinMIBS1QTO8scNJUVyRKfWs3HeEoTR1h1lU3sXpXI6t2NbCzvh2ArEAGcycU808fmEz5yHx3PUlOFiX5WYwpCg3Yi8vMwGWBk2be3tXAPU+uZ2110xHvleVnc8aEYj565gRmjyti7oRh5GRZa8UkjgVOGuiOxFi8poYla/ewbHMtIwtCfOnyckYWhsjNCpCblcmMMYXWajG+s8AZ4l6r3M83n1zHtro2xhbn8LmLJ/NPF08hP9v+6U3y2f+6ISgSjfFK5X5+8dI23th+gPElOfzq1nlcetoIa8GYlPI1cERkPvBjIAD8h6re3+P9CcBvgGJvnrtUdYn33t3Ap4Ao8EVVfc7PWge77kiMl7bU8eKmWl7YuI+6li5GFGRzzwdncMvZE+zMkRkQfAscEQkADwJXAFXAChFZrKob4mb7BvC4qj4kIjOAJcBE7/kC4HRgDPCCiExV1aPf75+mqhra+e2bu/h9xW72t3aTn53JBVPKuGHuWC49bcShAZuMGQj8bOGcBVSq6nYAEXkMuB6IDxwFCr3nRUCN9/x64DFV7QJ2iEilt77Xfax30NjT1MHDL2/n7V2NvFPVCMBl00dyy1kTOH9KmYWMGbD8DJyxwO6411XA2T3muRd4XkS+AOQBl8ct+0aPZcf6U+bgsmTtHu7+41o6wlHmjC/m85dMYcFZE+weJDMopLrT+GbgEVX9oYicC/yXiMzs68IishBYCDBhwgSfShwYVJUHXtjKj/+6ldnji3ngo3OYVJaX6rKM6Rc/A6caGB/3epw3Ld6ngPkAqvq6iISAsj4ui6o+DDwMMG/evD6MCDs4Vda28L1nN7N0wz4+/P5x/L8bZ9lhkxmU/AycFUC5iEzChcUC4JYe8+wCLgMeEZHpQAioAxYDvxORH+E6jcuBt3ysdcBp7Yrw2Fu7WLphHyt2HiAnGODuq09j4UWn2qltM2j5FjiqGhGR24HncKe8F6nqehG5D6hQ1cXA/wZ+KSJfxnUg36aqCqwXkcdxHcwR4PPpcoZKVXm8Yjfff24L+1u7OG1UAbdfWs5t502kJC8r1eUZc1JE+/LdFIPAvHnztKKiItVlnJTdB9q5+49r+VvlfuadMoyvXzuduROGpbosY45LRFaq6rzjzZfqTmMDdIajPPzydh5cVklmhvDdG2bysbMn2KGTGXIscFKkMxxl675W1lQ18rNlldQ0dXLNrFF8/doZdorbDFkWOEnW0hlm0d928shrO2hoDwMwa2wRP7hpNudNLktxdcb4ywInibbXtfKZ/6xgW10bl08fwY1zxzFlRD7lI/Lt2whMWrDASZJn1+3lzifWEAxk8OhnzuHcyaWpLsmYpLPA8Zmq8p2nN7Lo1R28b1wRD95yBuNLclNdljEpYYHjs5++WMmiV3dw67mn8PVrZ9gVwiatWeD46Nl1e/jh0i3cOHcs9153up3mNmnP/tz6pKaxg6898Q6zxxfzL38/y8LGGCxwfBGLKXc+sYZITPnxR+fYaHvGeCxwEiwaU+59aj2vVtbzzQ/OYKINIWHMIdaHk0CxmHLHY2/z9Dt7+MyFk1hw5vjjL2RMGrHASaDfvvkuT7+zh6/Nn8bnLp6S6nKMGXDskCpB9jZ18r1nN3NheRn/9IHJqS7HmAHJAidB7nt6PeFojO/eMNPOSBlzFBY4CbBi5wGWrN3L5y6ewiml1klszNFY4JykWEz57l82MrIwm4UXnZrqcowZ0CxwTtLTa/ewZncjX71yGjlZdr2NMcdigXMSuiJR/vXZTUwfXcjfnzEu1eUYM+BZ4JyE/3ztXaoaOvg/15xGwMazMea4LHBOUGN7Nz9dVslFU4dzYfnwVJdjzKBggXOCvv/cZlo6w9x99WmpLsWYQcMC5wSs2d3I797axSfOncj00YWpLseYQcMCp59UlXsWr6csP5uvXDk11eUYM6hY4PTTmqom1uxu5IuXlVMYCqa6HGMGFQucfnq8YjehYAY3zBmT6lKMGXQscPqhozvKU6truGbmaAqsdWNMv1ng9MNz6/fS0hXhI/NsnBtjToSvgSMi80Vks4hUishdvbz/byKy2ntsEZHGuPeice8t9rPOvvrDqirGl+Rw9qSSVJdizKDk2wBcIhIAHgSuAKqAFSKyWFU3HJxHVb8cN/8XgLlxq+hQ1Tl+1ddfdS1dvFq5n89dPMW+JdOYE+RnC+csoFJVt6tqN/AYcP0x5r8ZeNTHek7KM+v2EFP4u9nWWWzMifIzcMYCu+NeV3nTjiAipwCTgBfjJodEpEJE3hCRG46y3EJvnoq6urpE1d2rp9bUMG1kAdNGFfi6HWOGsoHSabwAeEJVo3HTTlHVecAtwAMicsS4nar6sKrOU9V5w4f7dz9TdWMHK3Y28HezR/u2DWPSgZ+BUw3En84Z503rzQJ6HE6parX3czuwnMP7d5LqmbV7ADucMuZk+Rk4K4ByEZkkIlm4UDnibJOInAYMA16PmzZMRLK952XA+cCGnssmy7Pr9jJjdKENH2rMSfItcFQ1AtwOPAdsBB5X1fUicp+IXBc36wLgMVXVuGnTgQoRWQMsA+6PP7uVTLUtnazc1cBVp49KxeaNGVJ8/V4qVV0CLOkx7Z4er+/tZbnXgFl+1tZXSzfsQxXmz7TAMeZkDZRO4wHr2XV7mViay9SR+akuxZhBzwLnGJo6wry+rZ6rZo6y75oyJgEscI7hb1v3E4kpl08fmepSjBkSLHCO4aUttRSGMpk7vjjVpRgzJFjgHIWq8tKWOi4sH05mwHaTMYlgn6Sj2LinhX3NXXxgqn0jgzGJYoFzFC9tcfdmfWCaBY4xiWKBcxTLN9cyfXQhIwtDqS7FmCHDAqcXHd1RVu1q4KLyslSXYsyQYoHTi4p3DxCOKudOLk11KcYMKRY4vXhtWz2ZGcKZE20oUWMSyQKnF69tq2fO+GLysn291cyYtGOB00NzZ5i1VY2cZ4dTxiScBU4Pb20/QEzh3MnWYWxMolng9PDG9nqyMjOYO8FuZzAm0SxweninuonTxxQSCgZSXYoxQ44FTpxYTNlQ08yssUWpLsWYIckCJ86O+jZauyLMtMAxxhcWOHHWVTcBWAvHGJ9Y4MRZW9VEdmYG5SNsOFFj/GCBE2dtdRPTRxfa+DfG+MQ+WZ5YTFlvHcbG+MoCx7PT6zC2wDHGPxY4ng17mgGYMaYwxZUYM3RZ4Hi27G0hkCFMsQ5jY3xjgePZtLeFiaW5doWxMT6ywPFs2dfCtFEFqS7DmCHN18ARkfkisllEKkXkrl7e/zcRWe09tohIY9x7t4rIVu9xq591dnRHefdAO9NGWv+NMX7ybYQpEQkADwJXAFXAChFZrKobDs6jql+Om/8LwFzveQnwLWAeoMBKb9kGP2rdWtuCKkwbZf03xvjJzxbOWUClqm5X1W7gMeD6Y8x/M/Co9/wqYKmqHvBCZikw369CN+9tAWDqSDukMsZPfQocETlHRAriXheKyNnHWWwssDvudZU3rbf1nwJMAl7s77KJsGVfC9mZGZxSmufXJowx9L2F8xDQGve61ZuWKAuAJ1Q12p+FRGShiFSISEVdXd0Jb3zT3hbKR+YTyJATXocx5vj6GjiiqnrwharGOH7/TzUwPu71OG9abxbw3uFUn5dV1YdVdZ6qzhs+/MS/IXPLvhY7nDImCfoaONtF5IsiEvQedwDbj7PMCqBcRCaJSBYuVBb3nElETgOGAa/HTX4OuFJEhonIMOBKb1rCtXVF2NfcxeTh1mFsjN/6Gjj/CJyHa2VUAWcDC4+1gKpGgNtxQbEReFxV14vIfSJyXdysC4DHerSgDgDfwYXWCuA+b1rC7WnqAGDcsBw/Vm+MidOn0+KqWosLhn5R1SXAkh7T7unx+t6jLLsIWNTfbfZXdWMnAGOLLXCM8VufAkdEfo27HuYwqvrJhFeUZDWNroUzxgLHGN/19cK/p+Oeh4AbgZrEl5N8NY0dBDKEEQXZqS7FmCGvr4dUf4h/LSKPAn/zpaIkq27sYFRhyEb5MyYJTvRTVg6MSGQhqVLT2MGY4lCqyzAmLfS1D6eF9/pwFNgHfM2vopKpprHTvmXTmCTp6yFVgXdDZTmuDwd66UQebGIxZU9TB9cWj051Kcakhb62cD4N3IG74nc1cA7uQr1L/SvNf/tbuwhH1c5QGZMkfe3DuQM4E3hXVS/BDSPReOxFBr5q75T4WOvDMSYp+ho4naraCSAi2aq6CZjmX1nJUeNd9GctHGOSo6/X4VSJSDHwZ2CpiDQA7/pXVnLYRX/GJFdfO41v9J7eKyLLgCLgWd+qSpLqxg4KsjMpDAVTXYoxaaHfQ4yq6kt+FJIKNY0djLb+G2OSJq0vr93X3MnIQgscY5IlrQOntqWLEQUWOMYkS9oGTiym1LV0MaLQbto0JlnSNnAa2ruJxNTuEjcmidI2cGpbugDskMqYJEr7wBlph1TGJE3aBs6+ZneVsbVwjEmetA2cuoOHVNbCMSZp0jZwaps7KQhlEgoGUl2KMWkjfQOnpcvOUBmTZGkeONZ/Y0wypXHgdFr/jTFJlpaBo6rUNtshlTHJlpaB09wZoSsSs0MqY5IsLQOnrsW7BscOqYxJqrQMnNpmdw3OcDukMiapfA0cEZkvIptFpFJE7jrKPDeJyAYRWS8iv4ubHhWR1d5jcSLrsvuojEmNfo/411ciEgAeBK4AqoAVIrJYVTfEzVMO3A2cr6oNIhL/bZ4dqjrHj9r2t1oLxwxBqtDRAM3V0NkMecMhVAgZQQgVQcC3j3uf+VnBWUClqm4HEJHHgOuBDXHzfAZ4UFUbAFS11sd6DmnqCJMhUJCd+n8Ak8ZUoXolbPgzNFVBVytk5UF2gXtIBnS3wb710LIXisdD2VQYeTq01UF9JSDQ3QoHtkPjboh09L4tyYDcUoh2u+cFo92jcAwUjYf84RANu7Dat97VUToF5nwMSicn7Ff28xM3Ftgd97oKOLvHPFMBRORVIADcq6oHB2cPiUgFEAHuV9U/99yAiCwEFgJMmDChz4U1d4QpCAXJyJA+L2PSXLgD2uuhYAxkZEB3O2RkQiDoWhXdrZBTAoEsiIWho9GFSM0qqN8G7fuhcRc0VXPoS2u729xygWwXJln50PgudLW48EHd+oafBhPOdoGy/o+w8tcuNIrGQ0YAMnNcEJVfCUXjXJCECqFtv1tXNOy237YfMrMhFoHmPdBSA/vWQeu+937PQBYMn+Z+301/ganzB03g9HX75cDFuG/1fFlEZqlqI3CKqlaLyKnAiyKyVlW3xS+sqg8DDwPMmzevz1893NQRpjAn1b+6SYpoGGpWQ3MVBPPcX/iuZveBbt0Le95xH8DSKe7Dmp0P+7dC3Wb34Y90u4BpqgKNQVYBBEOuhQEgAdDosWsIFUFumVt/+RUuJFRdK2LEdJhxvZunL1RdKySnBLJyT27fHBTuhI4DLvhChS5Ewe07EvtH2c9PXTUwPu71OG9avCrgTVUNAztEZAsugFaoajWAqm4XkeW4b/vcRgI0d0YoyrGvhhmQGnfDu6+6Q4qcEveXPNLhAqJonPur3lIDrbWuddDR6D4s7fXQ3uDCJBYFERcS+7dCuK33bUnAtR4ys+Cdx6GryU0P5sHwqTB6NgRz3Qdv9iTXJ1K3GSKdMGwioK6lkz/ChUdHg5s3IxNyiiF/lFtHYQK/u17E7YdECoYgOObI6YHEf0b8DJwVQLmITMIFzQLglh7z/Bm4Gfi1iJThDrG2i8gwoF1Vu7zp5wP/mqjCmjvC9l1UqVa7CTqbXHhsWAz1W91f2vqtJ7a+QDbklkC29xdaY1AwCs74XzDhHHfI0d3uwiW70OsnKXSvD+pqcZ2tBaNdq8YknG+Bo6oREbkdeA7XP7NIVdeLyH1Ahaou9t67UkQ2AFHgTlWtF5HzgF+ISAx36v7++LNbJ6upI8zk4fmJWp3pq1gMqitg+b/Athffm55bCmPnuQ//nFtg6lUQ6YLORhccmTkQzHF9IM01rqMzf6Q7/AkVQ84w18KQk2z+H+ysNb7xtSNDVZcAS3pMuyfuuQJf8R7x87wGzPKrrubOsB1S9YeqCwBwrYeuZtj8jOsALZnkDmGadrn+kPZ6OPUSGDXzvUOb7jbYvhy2LoW2WhcQV3zHnW3Jyoex7+/bKduxZ/j6axr/pWXPaXNHxDqN+yIadn0bL38fGnYcf/5hk1yn47LvHvleqBgmX+paL9Ou7nsnqRlS0u5T1x2J0RGOWh/OsbTWwis/hLW/dy2WUe+DS7/hOnCjEfdzyqUwfLo7k5MRdB2jWXlu+ZZ97qyQBAB17w8/bUBceGZSK+3+BzR3hgEoyrXAQdUd8qhC9SrYuwbaD8Br/+4Og0671l34VX7F0ftHRkw/clrBSPcwpof0C5wOFzhp3cKJxaDiV7Ds/7pTwMGcw88OjT8HrvuJuwDMmARKv8DpjAAMrT6chnddaOSPOPK9pip3pWtXCxSNdVeYvvx9dwXsxAvdqeO2/XDu511LJjPHnV4+2TM+xvRiCH3q+qbJa+EM6rNU+zbAxqfcBW/Vq6DqLdevMvECdz1KtBtOOc/dX7P29+7UcrziCXDDQzD7ZgsWk1RpFziD4pDqYN9KTzVvw9JvwY6XAHEXrhVPgMvvdS2Yzc+4e2E0Bsvvh8yQa7lMne86dJuq3HqmzvflKlJjjif9AsfrNC4cqC2cmrfhiU+6C9uu+I47S7TzFdi7Fna87C6Su/xeOONWd+gT77J73nvefsC1enKK35s2Zm4yfgNjjirtAmfAHVKFO9xp465mWPEr17+SV+buAfrV5W6ezJDrwL3gy3DBl/p2DUvPMDJmAEi7wGnuiJAVyCA7M4X3yoQ74a1fwPo/wZ417hDo4F3H066F6/7d3cuz9gl3F/Mp57lhBYwZ5NIvcDrd0BSSjM7S+m3uCt0J57q7jlv2wsbF8PqD7oK58WfDBV9x/SnRbpj5YRg5473lz/qM/zUak0RpFzhuLByfDqfaD8BTd7i7jYMheP1nbjCmQJYbsiDc7uYbPdtd53Lqxf7UYcwAlXaB49vQFJFuePwTsOsNFy6RDpj1EZh1k+v01Zi7y3nK5XZBnUlb6Rc4fg2+teSrLlhufNiN4NZW605ZA0y9MvHbM2YQSrtRhpo7fBiaYtNfYNVv4PwvweyPusOp4r6PsWxMukjLwCkMJbBh11rn+m1GvQ8u+Xri1mvMEJRWh1Sq6p2lOskWjqo7y7T5WXjlB25Yyk8sPny4SmPMEdIqcDrCUcJRPblDqu52WHSlu/IX4JQL4MrvHH462xjTq7QKnOYO707xkzlL9eoDLmwu/7YbwW7ULLsB0pg+Sq/AOXQf1Qn+2g074W8PwMwPuVsMjDH9klaBM2V4PmvuuZLsYD/7yjctgcVfcN9eGMx1N1UaY/otrQInI0P6P7Royz548nPu6uEzPw1TLnMDWRlj+i2tAqffVN0p73AHfOQ37tsYjTEnLO2uw+mXd1+DLc+462ssbIw5aRY4x1LxKzf2zJmfTnUlxgwJFjhH01rrvvN69i2QlZvqaowZEixwjubt/3JDS8z7ZKorMWbI8DVwRGS+iGwWkUoRueso89wkIhtEZL2I/C5u+q0istV73Opnnb1a/aj7GhXruzEmYXw7SyUiAeBB4AqgClghIotVdUPcPOXA3cD5qtogIiO86SXAt4B5gAIrvWUb/Kr3ME3V7ovh5v1DUjZnTLrws4VzFlCpqttVtRt4DLi+xzyfAR48GCSqWutNvwpYqqoHvPeWAvN9rPVwO19xPyddlLRNGpMO/AycscDuuNdV3rR4U4GpIvKqiLwhIvP7sax/drwMOSUw4vSkbdKYdJDqC/8ygXLgYmAc8LKIzOrrwiKyEFgIMGFCgga8UnWBM/EC980JxpiE8fMTVQ2Mj3s9zpsWrwpYrKphVd0BbMEFUF+WRVUfVtV5qjpv+PDhiam6YSc07bbDKWN84GfgrADKRWSSiGQBC4DFPeb5M651g4iU4Q6xtgPPAVeKyDARGQZc6U3z3/bl7qcFjjEJ59shlapGROR2XFAEgEWqul5E7gMqVHUx7wXLBiAK3Kmq9QAi8h1caAHcp6oH/Kr1kKoKeOFb7svnyux0uDGJJqqa6hoSYt68eVpRUXHiK9hfCQ9/APKGw62LbRB002fhcJiqqio6OztTXYrvQqEQ48aNIxg8fNQFEVmpqvOOt3yqO40HjopFEOmC256GonGprsYMIlVVVRQUFDBx4sTkfKNriqgq9fX1VFVVMWnSpBNah52GAYiGYe3jMPUqCxvTb52dnZSWlg7psAEQEUpLS0+qJWeBA1D5V2irgzkfS3UlZpAa6mFz0Mn+nhY4AKt/C7llUH5Fqisxpt8aGxv52c9+1u/lrrnmGhobG32o6OgscMKdsOVZNzB6wIevADbGZ0cLnEgkcszllixZQnFxsV9l9co6jfeth2g3TLow1ZUYc0Luuusutm3bxpw5cwgGg4RCIYYNG8amTZvYsmULN9xwA7t376azs5M77riDhQsXAjBx4kQqKipobW3l6quv5oILLuC1115j7NixPPnkk+Tk5CS8VgucmlXu55i5qa3DDAnffmo9G2qaE7rOGWMK+dbfHf2+vvvvv59169axevVqli9fzrXXXsu6desOnUlatGgRJSUldHR0cOaZZ/KhD32I0tLSw9axdetWHn30UX75y19y00038Yc//IGPf/zjCf09wAIHat52194U2jcxmKHhrLPOOuy09U9+8hP+9Kc/AbB79262bt16ROBMmjSJOXPmAPD+97+fnTt3+lKbBU7N2651kyZnGYy/jtUSSZa8vLxDz5cvX84LL7zA66+/Tm5uLhdffHGvp7Wzs7MPPQ8EAnR0dPhSW3p3Gne3Qd0mO5wyg1pBQQEtLS29vtfU1MSwYcPIzc1l06ZNvPHGG0mu7nDp3cLZuxY0ZoFjBrXS0lLOP/98Zs6cSU5ODiNHjjz03vz58/n5z3/O9OnTmTZtGuecc04KK033wKl52/20wDGD3O9+97tep2dnZ/PMM8/0+t7BfpqysjLWrVt3aPpXv/rVhNd3UHofUtW8DQVjoGBUqisxJi2kd+DUboSRM1JdhTFpI30DRxUObHdj3xhjkiJ9A6d1H3S3QsnkVFdiTNpI38Cp3+Z+llrgGJMsaRw4le6nBY4xSZO+gXNgGwSyoGj88ec1ZgA70eEpAB544AHa29sTXNHRpW/g1G/CpiL6AAAKTElEQVSDYZMgI5DqSow5KYMpcNL3wr/6bXY4ZYaE+OEprrjiCkaMGMHjjz9OV1cXN954I9/+9rdpa2vjpptuoqqqimg0yje/+U327dtHTU0Nl1xyCWVlZSxbtsz3WtMzcGIxd0q8/PJUV2KGmmfucrfMJNKoWXD1/Ud9O354iueff54nnniCt956C1Xluuuu4+WXX6auro4xY8bwl7/8BXD3WBUVFfGjH/2IZcuWUVZWltiajyI9D6maqyDaZafEzZDz/PPP8/zzzzN37lzOOOMMNm3axNatW5k1axZLly7ln//5n3nllVcoKipKSX3p2cKxU+LGL8doiSSDqnL33Xfz2c9+9oj3Vq1axZIlS/jGN77BZZddxj333JP0+tKzhfPuq+6nXWVshoD44SmuuuoqFi1aRGtrKwDV1dXU1tZSU1NDbm4uH//4x7nzzjtZtWrVEcsmQ/q1cHa9Ca/8CGZcD4VjUl2NMSctfniKq6++mltuuYVzzz0XgPz8fP77v/+byspK7rzzTjIyMggGgzz00EMALFy4kPnz5zNmzJikdBqn11f9ttXDLy5019989iUIpeY41gwtGzduZPr06akuI2l6+337+lW/6XVI1dkIuaXwkUcsbIxJAV8DR0Tmi8hmEakUkbt6ef82EakTkdXe49Nx70Xjpi9OSEGlk+GzL8OYOQlZnTGmf3zrwxGRAPAgcAVQBawQkcWquqHHrP+jqrf3sooOVU18Mthg6cakjJ8tnLOASlXdrqrdwGPA9T5uz5iUGSp9ocdzsr+nn4EzFtgd97rKm9bTh0TkHRF5QkTi76QMiUiFiLwhIjf0tgERWejNU1FXV5fA0o3pu1AoRH19/ZAPHVWlvr6eUCh0wutI9Wnxp4BHVbVLRD4L/Aa41HvvFFWtFpFTgRdFZK2qbotfWFUfBh4Gd5YqmYUbc9C4ceOoqqoiHf7ohUIhxo0bd8LL+xk41UB8i2WcN+0QVa2Pe/kfwL/GvVft/dwuIsuBucBhgWPMQBAMBg/7pktzdH4eUq0AykVkkohkAQuAw842icjouJfXARu96cNEJNt7XgacD/TsbDbGDDK+tXBUNSIitwPPAQFgkaquF5H7gApVXQx8UUSuAyLAAeA2b/HpwC9EJIYLxft7ObtljBlk0utKY2OML/p6pfGQCRwRqQPePc5sZcD+JJRzPAOlDhg4tVgdRxootfSljlNUdfjxVjRkAqcvRKSiLymcLnXAwKnF6jjSQKklkXWk171UxpiUssAxxiRNugXOw6kuwDNQ6oCBU4vVcaSBUkvC6kirPhxjTGqlWwvHGJNCaRE4xxuXx+dtjxeRZSKyQUTWi8gd3vQSEVkqIlu9n8OSVE9ARN4Wkae915NE5E1v3/yPd1W43zUUezfrbhKRjSJybgr3x5e9f5d1IvKoiISStU9EZJGI1IrIurhpve4HcX7i1fSOiJzhcx3f9/593hGRP4lIcdx7d3t1bBaRq/qzrSEfOHHj8lwNzABuFpEZSSwhAvxvVZ0BnAN83tv+XcBfVbUc+Kv3OhnuwLuFxPM94N9UdQrQAHwqCTX8GHhWVU8DZnv1JH1/iMhY4IvAPFWdibsifgHJ2yePAPN7TDvafrgaKPceC4GHfK5jKTBTVd8HbAHuBvD+7y4ATveW+Zn3GesbVR3SD+Bc4Lm413cDd6ewnidxg5JtBkZ700YDm5Ow7XG4/8SXAk8DgrugK7O3feVTDUXADrz+w7jpqdgfB4dQKcHd5vM0cFUy9wkwEVh3vP0A/AK4ubf5/Kijx3s3Ar/1nh/2+cHdunRuX7cz5Fs49H1cHt+JyETcXe9vAiNVdY/31l5gZBJKeAD4GhDzXpcCjaoa8V4nY99MAuqAX3uHdv8hInmkYH+oG5HgB8AuYA/QBKwk+fsk3tH2Qyr/H38SeCYRdaRD4AwIIpIP/AH4kqo2x7+n7k+Fr6cLReSDQK2qrvRzO32QCZwBPKSqc4E2ehw+JWN/gBuVADcK5SRgDJDHkYcWKZOs/XAsIvJ1XLfAbxOxvnQInOOOy+M3EQniwua3qvpHb/K+g8NzeD9rfS7jfOA6EdmJG+71UlxfSrGIHBw1IBn7pgqoUtU3vddP4AIo2fsD4HJgh6rWqWoY+CNuPyV7n8Q72n5I+v9jEbkN+CDwMS/8TrqOdAic447L4ycREeBXwEZV/VHcW4uBW73nt+L6dnyjqner6jhVnYjbBy+q6seAZcCHk1jHXmC3iEzzJl2GG+soqfvDsws4R0RyvX+ng7UkdZ/0cLT9sBj4hHe26hygKe7QK+FEZD7u8Ps6VW3vUd8CEckWkUm4Tuy3+rxivzvmBsIDuAbX074N+HqSt30Brln8DrDae1yD6z/5K7AVeAEoSWJNFwNPe89P9f7DVAK/B7KTsP05QIW3T/4MDEvV/gC+DWwC1gH/BWQna58Aj+L6jsK4lt+njrYfcB38D3r/h9fizqz5WUclrq/m4P/Zn8fN/3Wvjs3A1f3Zll1pbIxJmnQ4pDLGDBAWOMaYpLHAMcYkjQWOMSZpLHCMMUljgWOMSRoLHOM7EZkjItfEvb4uUcOEiMiXRCQ3Eesy/rPrcIzvvEvk56nq7T6se6e37j5/nYqIBFQ1muhazPFZC8ccIiITvQGxfukNSvW8iOQcZd7JIvKsiKwUkVdE5DRv+ke8wazWiMjL3u0k9wEfFZHVIvJREblNRH7qzf+IiDwkIm+IyHYRudgbEGqjiDwSt72HRKTCq+vb3rQv4m66XCYiy7xpN4vIWq+G78Ut3yoiPxSRNcC5InK/uEHR3hGRH/izR80RknH5uD0GxwM3JkoEmOO9fhz4+FHm/StQ7j0/G3dvFrjL7sd6z4u9n7cBP41b9tBr3OBPj+Eu3b8eaAZm4f4Yroyr5eAl/gFgOfA+7/VOoMx7PgZ3f9Rw3F3pLwI3eO8pcJP3vBR3Wb7E12kP/x/WwjE97VDV1d7zlbgQOow31MZ5wO9FZDVucKjR3tuvAo+IyGdw4dAXT6n75K8F9qnqWlWNAevjtn+TiKwC3saNNtfbqI1nAsvV3f19cEiFi7z3org79sGNe9MJ/EpE/h5oP2JNxheZx5/FpJmuuOdRoLdDqgzcIFVzer6hqv8oImcD1wIrReT9/dhmrMf2Y0Cmd1fyV4EzVbXBO9QK9WG98TrV67dR1YiInIW7O/zDwO244TqMz6yFY/pN3QBiO0TkI3BogO/Z3vPJqvqmqt6DG9lvPNACFJzEJgtxA3U1ichI3Pi+B8Wv+y3gAyJS5o2zezPwUs+VeS20IlVdAnwZN66ySQJr4ZgT9THgIRH5BhDE9cOsAb4vIuW4Ppm/etN2AXd5h1//0t8NqeoaEXkbN4zEbtxh20EPA8+KSI2qXuKdbl/mbf8vqtrbWDYFwJMiEvLm+0p/azInxk6LG2OSxg6pjDFJY4dU5phE5EHcOL/xfqyqv05FPWZws0MqY0zS2CGVMSZpLHCMMUljgWOMSRoLHGNM0ljgGGOS5v8DWg1c+m8vQKEAAAAASUVORK5CYII=\n",
- "text/plain": [
- ""
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "steps = np.arange(1, MAX_ESTIMATORS, 1)\n",
- "srfc = StreamingRFC(n_estimators_per_chunk=1,\n",
- " max_n_estimators=np.max(steps),\n",
- " **params)\n",
- "\n",
- "%time train_scores, test_scores = inc_partial_fit(x, y, srfc=srfc, steps=steps, sample=1)\n",
- "print(f\"With {len(srfc.estimators_)}: {train_scores[-1]} | {test_scores[-1]}\")\n",
- "plot_auc(steps, train_scores, test_scores)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### Partial random forest\n",
- "1 estimator per 10 % subset"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 16,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "CPU times: user 16.4 s, sys: 15.6 ms, total: 16.4 s\n",
- "Wall time: 16.7 s\n",
- "With 119: 0.6795318199557177 | 0.6549408766486443\n"
- ]
- },
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAASIAAAELCAYAAAB9HYtIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3Xl4XWW1+PHvyjwPTdIhnelcC22hFAoIFERaEKSiZRAF9Mr1p1zRKwg465V78V7ne5VBBERlUJB5aAu2gCJD6ZgOdB7StEmaNnNyknPO+v3x7rSn6UmbpGdKsj7Pkyfn7Omss0kX7/vuvd8lqooxxsRTUrwDMMYYS0TGmLizRGSMiTtLRMaYuLNEZIyJO0tExpi4s0RkjIk7S0TGmLizRGSMibuUeAcQC8XFxTpmzJh4h2HMgPP+++/vV9WS4203IBLRmDFjWL58ebzDMGbAEZGd3dnOumbGmLizRGSMiTtLRMaYuLNEZIyJO0tExpi4s0RkjIk7S0TGmLgbEPcRGWN6r7rBx7bqRgKq+PxBqut9lBZkcs6E4oh9hiUiY/qhNn+QXQeaWbHrILsPNJOZlszwgkxOGVHAsPwMUpKEnQea2V7dxO6DzeysaWb7/iZ2HWimusFHUBUBkkRo8PmPOv7l00v7TiISkXnAL4Fk4AFVvTvMNguB7wMKrFbVa0VkLvDzkM0mA1er6jMi8jBwHlDnrbtBVVdF71sYE33tgSCBoJKRmnxomaoSVEgS8AeVZl+AffWt1Da3ocCO/U2s2VNHa1uAVn+APbWtHGjy0dIW5ECTj+Ax6mKIQGjdjOy0ZMYUZzN1WB6DJ6WTLIICgaAyojCTiUNySU1OIi0licG56ZTkpkf0+0ctEYlIMvBr4CKgHHhPRJ5T1fUh20wA7gTOVtWDIjIYQFWXAjO8bQYBW4DFIYe/TVWfjFbsxkRLbXMbG/c1sLmyge37m2lp97PrQDPLdxzE5w+SlpJESU46WWnJVNS20NQWOObx8jJSyM9KJS05idKCTE4qziYjNZmS3HRGFmYyY2QB40pyaAsE2b6/ibXldVQ3+mhtDzCmKJuxJdmMLMyiOCcNEYnRWThaNFtEs4EtqroNQEQeBz4OrA/Z5gvAr1X1IICqVoU5zieBl1W1OYqxGhNx/kCQ6kYfzW0ByvbUcf8b21hXUX9ofWZqMjkZKRTnpHPN7FGU5KZT39JOdYOPRp+fs8cXU5CVSlAhNUnITEtmSF4Gg7LTABiWn8HY4uxuJZCMpGSmDMtjyrC8qH3fExHNRDQc2B3yvhw4o9M2EwFE5B+47tv3VfWVTttcDfys07K7ROS7wGvAHarqi1jUxnTBHwiydk8dlfWttLQHKMxyCWF/YxvrK+rZXNVAeyBIS1uA6gYflQ0+AiH9o3El2dw+bzJThuUyaWguQ/My4toKSSTxHqxOASYA5wMjgDdE5GRVrQUQkWHAycCikH3uBPYBacD9wO3ADzsfWERuAm4CGDVqVPS+gel3AkHlnW01vLaxirI9ddS3+inOSaNsTx0Hm9vD7pOeksSkoblkpCSTn5XGuME5lOZnMqwgg+y0FEpy05lzUhFJSZZ4wolmItoDjAx5P8JbFqoceEdV24HtIrIJl5je89YvBJ721gOgqnu9lz4ReQi4NdyHq+r9uETFrFmzrJyt6ZKqsrW6kVfK9rF850HWltdR09RGRmoSU4blUZqfQXWjj3MnlvDRqUMZU5xFZmoyB5vbABiUnc6IwkxSk+22vN6KZiJ6D5ggImNxCehq4NpO2zwDXAM8JCLFuK7atpD11+BaQIeIyDBV3SuuTXsFUBal+E0fp6rsPtDC+r31tAWCJIuQlpLEzpom/r5lP1X1Php8bkymtT0IwOShuZw3qYQLJw/hwimDj7iKZaInaolIVf0icjOuW5UMPKiq60Tkh8ByVX3OW/dREVkPBHBXw2oARGQMrkX1eqdD/0lESgABVgFfjNZ3MH2TqrJ4fSU/fnkj2/Y3hd1mXEk2Y4uzyU7PYXBuOmOKs/nIlCEMycuIcbQGQFT7f69l1qxZajM09k+7DzRT09TGsPwM1pTX8beNlby5eT/lB1uYMDiHz541hlOG55Odnkwg6O7XGZSdRmlBZrxDHxBE5H1VnXW87eI9WG1Mj+w+0MxrGyrZvr+JVeV1rN5de8T63PQU5owr4pYLJ7Bg5nBSbNymT7BEZBJe2Z46nl9dwVtba1i7x91Qn5uewrjBOdwxfzInFWdTUdvC+MG5zB47iLQUSz59jSUiE3c7a5r4x5Ya9ta1MH1EASW56dQ0+dhV08w/ttawZH0lqcnCzJGFfGPeJC47pZSRg7LiHbaJIEtEJmaCQWXjvgbaAu5ZqA17G3ilbN+hVk44BVmpfPUjE7jx7LHkZ6bGMFoTS5aITNSpKmvK6/jec+tY1WlMZ9rwPL596RTmTh5MaX4ma8praWj1U5idxshBmZTkpNvdxwOAJSITMXXN7azcfZB1FfXsPtDMvvpW9jf62LG/mUafn6LsNP7jimmMKMgkLzOF8YNzj2rlnHFSUZyiN/Fkicj0yqbKBt7YVM36Cnez4I6aJtZV1B+aWqI4J51h+RmU5KRz6qhCpgzL45Jpw8jPsu6VOZolItMjB5rauOvFDTy1ohxwT4BnetNOfPXCiZw+tpBTRhSQk25/Wqb77K/FdEuTz8+Df9/O/W9so6U9wJfOH8dn5oxmWL7dGGhOnCUiE1YwqFQ1+EgS2FHTzK1/Wc2uA81cNHUI37h4EhOG5MY7RNOPWCIyh/gDQZasr+TRd3exfMdBWtoPzw44ojCTJ2460waTTVRYIjKoKs+uquAXr25iR00zpfkZXHX6SMaVZIMIqsqCmcPJzbCBZhMdlogGIH8gyGPv7mJTZSPnTizh0Xd2svSDaiYPzeXe607joqlDSLYJvEwMWSIaYNaU1/KNJ9ewcV8DaSlJ/OHtnaSnJPH9y6by2TljbAZBExeWiAYIVeW+N7bxk0UfUJyTzr3Xncb5k0p4e1sNY4qyGVOcHe8QzQBmiWgA8AeCfPPptfx5eTmXnDyU/1xwMgXexO/nTxoc5+hMn+Nvg7ZGyBoUsUNaIurn2gNBbn50BYvWVfKVC8bztYsm2rNbpucCfqhYCeuehjWPw5TL4LJfRuzwloj6qdrmNnz+IHe9uIFF6yr53mVTufHssfEOyyS6gB9qd8L+za7Vk5wKmxfD+ufBVwdJqTBpPnxoQUQ/NiFLTnvLA8Bab7Ndqnq5t3ws8DhQBLwPfEZV26L5PRKVzx9gb20rzW0BJg/NRQTe23GQ+9/YyqsbDteqvGP+ZEtCBoJBqN4IgTZo2g/vPwQ1W2HQWPC3wsEdULsLgp1q3aflwJTLYcJFcNL5Ee2SdUjIktOeFlWdEebQPwZ+rqqPi8i9wOeBe6L1PRJV2Z46Pv3AO9S1uEpLo4uyyElPYV1FPYVZqXx57jhKCzIpLchkro0DDUyNVfDe72DHm67Q/f4PoLnm8PqsIhhxOhzYDqkZMPQUmHoFDDoJSiZBeh60N0HJZEiL7sWMvlBy+hCvhNAFHC5L9Htca2rAJKJgUNlS3cgND71LTnoK3/nYVIJB5akV5TT6/Ny1YBqfmDmCzDQrg9Nv1VfA5iXga3DJBIWWWti3Fmq2QGOle+/zJpwbPgtSM2HCR2HseZCRB0kp7nVqYlQtSeSS0xkishzwA3er6jO47litqvpDjjk8SvEnlEBQ+a+XNvDwWzvwB5WCrFQe/9xsxg/OAWDh6SOPcwST0BqrXKsld4h7rwp15S6x1O2G1npoqoKtf3MJJ5ycIa4lM/IM133KKoIPfQKKx8fue/RSvAerj1VyerSq7hGRk4C/ichaoOs5RTvpTyWnm9v83PL4Kpasr2TBzOFMH5HPeZMGM9bu/enbggFY82dY/iCUv+uWZQ5yrRdf4+EWTQdJdknmI9+HifMhb5gb6xGBtFzIKYn1N4iYhC05rap7AFR1m4gsA2YCTwEFIpLitYrCHRNvv35Rcnr3gWa+8MhyNlU28P3LpnKDDTr3bb4G2LoUyt+DTYvcuE3JFJj7bUjPgeoPINDuukwlk91PwSjILITULEju9E82Iz8+3yPCErLktIgUAs2q6vOWnw38t6qqiCwFPom7cnY98GwUv0PcVNW3cu/r23j8vV2kJAkP3Tib8yb23f/jDVgHd0DdHqjeAB+8Attfd1etktNg2HT41O9h6sddq2YAS8iS0yJyFnCfiASBJNwYUccg9+3A4yLyI2Al8LtofYd4KdtTx40Pv8fBpjYum17KVz8ygdFF1g2Li31lsHc1aMB1jTTgljVVuUQy5sMw/LTDiUQV9q2BTYth3V+hKuTaTOFYOP0LMPlSd7UqJS0+3ykBWcnpBLNy10Gue+AdCrLSeOjG05loE5D1TtUGeO0/XHfn/DvcJeme2LYMlv0Ydr119LrUbMgucvfcAOQNd12opBTYs/zwJfKRZ8C0K6F4outeDTppwLV8rOR0H9TaHuDrf1lNQVYaf/3SWQzJS4xLqwlBFcqecuMrdbtg7Lkw+WNuUDfQBknJsOF52PQKIHBwu7sRz++Dsr/C9KvgzC/D4CmHk0HNVncVauLFkDMUVj8KFavcXcU7/w75I+GjP4JJl7iulAYBdcuTkqGpBrYsgQ9ecle42ltg4jwYfba7+S/H7t/qLmsRJZD/WbSRXy/dyiOfm825Nh50WFsTPP9VWPtnyCqG3GFQGeYSdlIKjLvA3XyXWwof/rpLUm/+FFb+wd09nD0Yisa7u4fL3wPUPbaQXQwNe90l75whcMpVcMYXE+Y+m77KWkR9zJ7aFu57fRtXnjrCklAHX6O7tP3W/0JTtbuy9OGvQ1KSa82Uv+cud6ekQXsrlM48fB9OqEt/4rpn6591+9S5CiR8+Ovu4c2Vf4AD2+Dj/wfjLhxw3adEYIkoQTy9ohx/UPnqRybEO5T4aKiETS+78ZWOaSZWPQotB+CkuXD+nTAq5H7YonHup7uyi+H0z7ufzkrDPUlkYskSUQJQVZ58v5wzTxrEyEFZ8Q4n8oJBN47S3uIGjwNtrlVSscoliJwh8ObPjr6Bb8JH4bzbYcRxW/amj7NElADe33mQHTXN3HxBP2wNNeyDp78I25YevS631LV4/K1ugHf+f7tWTkqGG5xOSop9vCYuLBElgL8sLycrLZn504bGO5TICQZh5SOw5HvuytWlP3P33LQ1Qko6ZJe4q0rtrW7+m6IJRyYeG6cZUCwRxdm6ijqeWlHOp2aNJLsvlWluPuCuTgUDbmxn+5vu3p3i8TBqDrxzn7uxb/TZ8LFfQMnE8MdJzXAPapoBrQ/95fc/bf4gX//zagqz07h9XoL+Y9y7xl25qixzV7GKJ7h7dPatBUlyl74DPkjPh8GTYd0zsPKPUDAaFtwPpyy01o05LktEcaKq/OjF9Wzc18Dvrp91aDL7hFG1AZbdDeufcTcGls503anKMvf7gu+4QWdfg5s6dPTZ7ia/tmbXEio91R5hMN1miShOfrp4E4/8cydf+PBYLpwS5t6XWFKFDc9B5Tr3kOaBbVC+3HW9zr0N5twMmQXdO1ZaFow6M6rhmv7HElGMba5s4EcvbuD1TdVcM3sk37xkSrxDgrd/A4u+6bpauaVuDuNzb4UzvxSV+YmN6cwSUQxV1rdyxa//QVKS8O1Lp3Dj2WPjX9pnzwp3ZWvSJW5KCutOmTiwRBRDv3ptMz5/kCX/fl78Z1dUhbV/cS2hnMHw8V9bEjJxY4koRrbvb+Lx93Zz7exR8U1C/jZ47wFY8XtXWqZ0pktC1gUzcWSJKEZ++eom0pKT+LcL4ziRuSo8f4ub7mL4LLjiXveUud3BbOLMElEMVNW38sKavXxmzmgG58ZxWom373FJ6LzbYe434xeHMZ1YIoqBx97djT+ofHbOmMgffP9m2P2um84iI+/IdW1Nrl55WxOseAQ2vuAmEzvvjsjHYcwJSMiS0yIyA1c0MQ83l/VdqvqEt/3DwHkcLi10g6quiub3OBHtgSCPvruTcyeWRHZsqGm/62ZtfMG9X/JdGHMO7Pqnm9xr2HT3xHurd5rScuGCb8Ocf7OumEk4iVpyuhn4rKpuFpFS4H0RWeTVOwM3yf6T0Yo9khavq6Sy3sddV4yO3EH3rYXHrnUTuJ93O4w+C974Cex62z1Y2lQFG16A8RfCjE+7UjRF42xA2iSshCw5raqbOjZQ1QoRqQJKgFr6EFXlN8u2MLY4m7mTIzR/ccUq+P3l7q7nG1+G4ae65SedH5njGxMH0Wyjhys53bk89ERgooj8Q0Te9rpyRxCR2UAasDVk8V0iskZEfi4i6ZEOPFKWfVDNuop6/t/540hOisCNi1Ub4Q8L3FjQ5xcdTkLG9HHxHiwILTl9DfBbETn0UJOIDAP+ANyoqkFv8Z3AZOB0YBCuztlRROQmEVkuIsurq6uj9w260Nzm51d/28zwgkwWzOycf3tpkXel6/rnXHkaY/qJaCai7pacfk5V21V1O9BRchoRyQNeBL6lqm937KCqe9XxAQ/huoBHUdX7VXWWqs4qKYntZPQ/XfwBM3+4hJW7avny3PGkJkfgNFeshK2vwVn/1vMaXcYkuGgmokMlp0UkDVdy+rlO2zyDaw3RqeR0GvA08EjnQWmvlYS4h7SuAMqi+B16zOcPcN/r2zhtdCGPfuEMrpk98vg7HUtLrZsH6O8/h/S88JO/G9PHJWrJ6euAc4EiEbnBO2THZfo/iUgJIMAq4IvR+g69sWFvA22BIJ85czRnjSvu/YHKl7t6XJsWuTLH4MrfZORHJlBjEkhU7yNS1ZeAlzot+27IawX+3fsJ3eaPwB+7OOYFkY80clbvdhf2po/s5vw94WxeAk9c51pAc77s5gJqqnHdMmP6IbuzOsJW7a5lcG46w/J7+SjHttfhsWtcaeTPPONqrBvTz1kiirDVu2uZPrKgd/MMtRx0pXcGjYXrn+/+rIjG9HHxvnzfr9Q1t7NtfxMzetste/l2aKyEBfdZEjIDiiWiCFpd7saHepWINjwPa55wc0TbjYpmgLFEFEFvbq5GBE4e0cMrW0374fmvugdVz701OsEZk8BsjCgCVJWfLt7Eb9/czrwPDSUvI7VnB3jpVvDVwxXPQ3IP9zWmH7AWUQQ8tWIP/7d0C1fNGsn/XjuzZzvv+Duse9p1yYZMjU6AxiQ4axGdoEBQ+c3SLUwdlsfdV57cs6tlwSAs+hbkDXe1w4wZoKxFdIJeLtvLtv1NfHnu+J5fsl/1J9i7Ci78nitMaMwAZYnoBKgqv166lZNKspk3bWjPdt5XBi9/A0bNgZM/FZ0AjekjLBGdgNc3VbNhbz3/77wezjfUWnf4EY5PPWxTt5oBz8aITsC9r29lWH4GH5/Rw/mG3viJqzH/uVcgt4ctKWP6IftfcS+t2l3L29sO8PlzxpKW0oPTWLsb3rkPpl8No86MXoDG9CGWiHrpoX9sJy8jhatn92CmxPYWV20DYO63ohOYMX2Qdc16aeWuWj48sYSc9G6ewvd+B69+3924+OGvQ8EJTphmTD9iLaJeaG0PsPtgM+NLcrq3Q8tBWPwdGDzVPVV/wXeiG6AxfYy1iHphR00TqjBucDcT0fIHob0JLv0JDD05usEZ0wdZi6gXtlQ1AnSvReT3ucHpcRdYEjKmC1FNRCIyT0Q+EJEtIhK24LqILBSR9SKyTkQeDVl+vYhs9n6uD1l+mois9Y75K+nVDGQnZmtVEyJwUkk3Skiv/YubY+jsW6IfmDF9VEKWnBaRQcD3gFmA4kpOP+dVhL0HVyH2Hdx82POAl6P1PcLZWt3IiMJMMlKTj7/xe7+Dkikw9rzoB2ZMHxXNFtGhktOq2gZ0lJwOFbbkNHAxsERVD3jrlgDzvFJCear6tjfx/iO4kkIxtaWqkXHd6ZZVrISKFTDrcxD7hpsxfUailpzuat/h3utjHTOqgkFl2/5uJqLlD0JqFky/KvqBGdOHxfuqWWjJ6RHAGyISkRFdEbkJuAlg1KjIlWfeU9tCa3uQ8ce7Yla/F9Y+CdOutFpkxhxHopac7mrfPd7rYx0TiF7J6a3V7orZMVtEvgZ4dCEgVovMmG5IyJLTHK4AWygihcBHgUWquheoF5EzvatlnwWejeJ3OErHpftxx7pi9uzNULkOFv4eSibFKDJj+q6ELDkNICL/gUtmAD9U1QPe6y8BDwOZuKtlMb1itmp3LaX5GRTlpIffwN8GH7wEs78AEy6KZWjG9FkJWXLaW/cg8GCY5cuBaREPtptW7qpl5ujCrjeoWg+BNhg5O3ZBGdPH2Z3VPVBZ38qe2hZOHXWMRLR3tfs9bEZsgjKmH+hWIvLGZHJD3ueJyBnRCysxrdh5EIBTRx2jgOLeVW7mxcKxMYrKmL6vuy2ie4DGkPeN3rIBZcWug6SlJPGh0mNcjq9Y5Qol2vSvxnRbd/+1iDeeA4CqBon/PUgxt2JXLScPz+96RsZAu7taNmx6bAMzpo/rbiLaJiJfEZFU7+cW3GX2AaPNH2Ttnrpjd8uqN0LAB6U9LLJozADX3UT0ReAs3M2D5cAZeHctDxQrdx2kzR/ktGNdMatY5X5bi8iYHulW98p7GPXqKMeS0F4u20daShLnTDjGXdpbX3MD1YPGxS4wY/qBbiUiEXkINx3HEVT1cxGPKAEFg8qidfs471hzVJcvdzXsP/x1G6g2poe6O+D8QsjrDGABUBH5cBLT6vJa9ta1ctvFXTyuoQqv3Ak5Q+Ccr8U2OGP6ge52zZ4KfS8ijwF/j0pECejlsn2kJgsXThkSfoMtr0L5u3D5/0J6bvhtjDFd6m0fYgIwOJKBJLLF6/Zx1rhi8jNTw2+w4XlIy4VTBvQwmjG91t0xogYOjxEpUAl8I1pBJZKaRh87apq5pqtCiqqwaRGMvwBS0mIbnDH9RHe7ZrnePNITcGNEEGbwuj9aU14HwPSRXdw/tHc1NO6DifPCrzfGHFd3W0T/AtyCm4hsFXAm8E/gguiFlhhWl9ciAtOGd/FYx6ZXAIHxNuWHMb3V3TGiW4DTgZ2qOheYCdRGLaoEsra8jvElOV1ftt/0Cow4HXIiNwukMQNNdxNRq6q2AohIuqpuBPr91IOqyuryOk4Z0UW3rK7cVeqYZN0yY05Ed+8jKheRAtzUrktE5CCwM3phJYa9da3sb/QxfWQX3bJ1z7jfU2Ne0ciYfqW7g9ULvJffF5GlQD7wStSiShBryl3vs8sW0bqnYegpUGSPdBhzInp8H5Gqvq6qz3lFE4/peCWnReQGEakWkVXez794y+eGLFslIq0icoW37mER2R6yLmpTIa4uryM1WZgyLMxNigd3wp7lMO0T0fp4YwaMuJac9jyhqjeHLlDVpcAM7ziDgC3A4pBNblPVJ6MVe4eyPXVMHJJLekqY0tLrrVtmTKTEu+R0d3wSeFlVmyMa3XGoKusr6vlQaV74DTYtdtN9DLIpYY05UfEuOQ1wpYisEZEnRWRkmPVXA491WnaXt8/PRSRsXR8RuUlElovI8urq6h4HX9Xgo6apjanDukhE+ze58SFjzAmL93wVzwNjVPUUYAnw+9CVIjIMOBlX/6zDncBk3H1Ng4Dbwx34RCu9rq+oB2BquPmpW+ugqQqKxvf4uMaYo8W15LSq1qiqz3v7AHBap2MsBJ5W1faQffaq4wMewnUBI279XpeIwg5U12x1vy0RGRMRcS057bV4OlwObOh0jGvo1C3r2McrOX0FUBbhuAFYV1HH6KIscjPCPHFviciYiIp3yemviMjlgB84ANzQsb+IjMG1qF7vdOg/iUgJILjn3r4YjfjXV9R3PT5Us9l9vA1UGxMR8S45fSduzCfcvjsIM7itqlF/0LbR52dHTTNXnjoi/AY1W6BgFKSEHSc3xvRQvAerE9LGvR0D1V21iLZYt8yYCLJEFMa2/U0AjB+cc/RKVTdGZInImIixRBRGdYO7kDckL+PolY2V0NYIxRNiHJUx/ZclojCqG3zkZqSQkRrm0Y6aLe63PehqTMRYIgqjusFHSW4XA9GV3qNy1jUzJmIsEYVR3eBjcLhEVL8XXr8bhkyDvC6uqBljeswSURhVDa2U5HYaHwoG4Kl/gfYW+OSDVs3VmAiK6n1EfVV1g4+SnE4tos1LYOffXRHFkn4/S64xMWX/W++kyeenqS3A4LxOiWjNE5A5CKZfE5/AjOnHLBF10nHp/ogWUWs9fPASTLsSkruo9mqM6TVLRJ1UN7pEdESLaOML4G+FU66KU1TG9G+WiDqpqvdaRKFXzdY8AYVjYcSsOEVlTP9miaiT6oZWIKRrFmiHnW/B5EtBJI6RGdN/WSLqpKrBR0qSUJiV5hZUb4RAG5TOjG9gxvRjlog6qW7wUZyTTlKS1/rZu8b9tvmpjYkaS0SdVDX4jhyo3rcGUrPs2TJjosgSUSdH3cy4d417pCMpzAOwxpiIsETUSXVjyAOvwSDsWwvDrFtmTDRFNRH1tuS0ty4Qsvy5kOVjReQd75hPeBPzR0QgqNQ0hjzwenA7tDXY+JAxURa1RBRScno+MBW4RkSmhtn0CVWd4f08ELK8JWT55SHLfwz8XFXHAweBz0cq5rqWdoIKhdlebtvnDVRbi8iYqOoLJacP8UoIXQB01L3/Pa6kUEQ0tvoByEn3ngXeuwaSUqBkSqQ+whgTRiKXnM7wSka/LSIdyaYIqFVV/3GO2auS040+d9jcDC8R7VsLxZMgNcyUscaYiIn3YPWxSk6PVtVZwLXAL0SkR9fPe1NyuqnNJaLsjhZRZRkMndaTjzXG9ELClpxW1T3e723AMmAmUAMUiEjHPEpHHfNEdLSIstNToKkGGva6S/fGmKhKyJLTIlIoIune62LgbGC9qiqwFPikt8/1wLORCviIMaLKtW6htYiMibpELTk9BbhPRIK4ZHm3qnqz1nM78LiI/AhYCfwuUjE3+UIS0dYyt3DIyZE6vDGmCwlZclpV3wLCZgCvqzY7spE6R3TNKssgZwjkdG98yRjTe/EerE4oTb4AANlpybCvzMaHjIkRS0Qhmtr8ZKQmkaJ+N/2HjQ8ZExOWiEI0tPrJSU+F/ZuMINyxAAAPSklEQVQg2G7jQ8bEiCWiEE0+PznpybD/A7dgSLgnUowxkWaJKESTz+8Gqhu9O7Fzhx17B2NMRFgiCtHYkYiaqkGSIaMg3iEZMyBYIgrR1OZ39xA1VUN2sZWVNiZG7F9aiMbWjkS0H7Lt/iFjYsUSUYhGX8B1zZr3uxaRMSYmLBGFOHTVrKkasiwRGRMrlog8gaDS0u61iKxrZkxMWSLydDxnlp8SAF+9dc2MiSFLRJ6OJ++LpMEtsBaRMTFjicjTkYgKqHMLrEVkTMxYIvIc6poFOxKRtYiMiRVLRJ6OKUByAgfdAmsRGRMzlog8jb52AHLaOxKRtYiMiZWErPQqIjNE5J8iss4rNXRVyD4Pi8j2kH1mRCLWRq9FlNl+EJLTIS0nEoc1xnRD1KaKDan0ehGu/th7IvJcyNzTHZ5Q1Zs7LWsGPquqm0WkFHhfRBapaq23/jZVfZII6hisTvMdcK0hkUge3hhzDAlZ6VVVN6nqZu91BVAFRLWv1DFYndpaY+NDxsRYIld6BUBEZgNpwNaQxXd5+/y8o+zQiWry+UlNFpJa7K5qY2It3oPVx6r02lH37A/Ajaoa9BbfCUwGTgcG4coLHaWnJac75iKSJnvg1ZhYS9hKryKSB7wIfEtV3w7ZZ686PuAhuigt1NOS040+P9mpyYfnIjLGxEyiVnpNA54GHuk8KN2xj4gIcAVQFolgm3x+itP94G+1J++NibFErfS6EDgXKBKRjmU3qOoq4E8iUgIIsAr4YiTibfIFGJza4t5k2hSxxsRSolZ6/SPwxy6OeUGEwwSgwednZKrXS0zPi8ZHmAGovb2d8vJyWltb4x1KVGVkZDBixAhSU1N7tX9UE1Ff8p8LppG5r8V1HjMsEZnIKC8vJzc3lzFjxiD99N40VaWmpoby8nLGjh3bq2PE+6pZwvhQaT4n5XgX5tLz4xuM6TdaW1spKirqt0kIQEQoKio6oVafJaJQvnr321pEJoL6cxLqcKLf0RJRqFZvChAbIzL9RG1tLb/5zW96vN8ll1xCbW3t8TeMEEtEoaxFZPqZrhKR3+8/5n4vvfQSBQWxu3psg9WhWutdhdfUrHhHYkxE3HHHHWzdupUZM2aQmppKRkYGhYWFbNy4kU2bNnHFFVewe/duWltbueWWW7jpppsAGDNmDMuXL6exsZH58+dzzjnn8NZbbzF8+HCeffZZMjMzIxqnJaJQvnrXGhoAfXoTez94fh3rK+ojesyppXl877IPdbn+7rvvpqysjFWrVrFs2TIuvfRSysrKDl3devDBBxk0aBAtLS2cfvrpXHnllRQVFR1xjM2bN/PYY4/x29/+loULF/LUU09x3XXXRfR7WCIK1Vpv40OmX5s9e/YRl9h/9atf8fTTTwOwe/duNm/efFQiGjt2LDNmuGm/TjvtNHbs2BHxuCwRhepoERkTBcdqucRKdnb2odfLli3j1Vdf5Z///CdZWVmcf/75YS/Bp6cfnuAiOTmZlpaWiMdlg9WhWuvtHiLTr+Tm5tLQ0BB2XV1dHYWFhWRlZbFx40befvvtsNvFgrWIQvnqoWBUvKMwJmKKioo4++yzmTZtGpmZmQwZMuTQunnz5nHvvfcyZcoUJk2axJlnnhm3OC0RhbIxItMPPfroo2GXp6en8/LLL4dd1zEOVFxcTFnZ4Qkubr311ojHB9Y1O5KvzsaIjIkDS0QdVMHXYC0iY+LAElGHtkbQoLWIjIkDS0QdWr0bzaxFZEzMWSLqYM+ZGRM3log6HGoR2X1ExsRaQpac9tZdLyKbvZ/rQ5afJiJrvWP+SiI12Yu1iEw/1NtpQAB+8Ytf0NzcHOGIwotaIgopOT0fmApcIyJTw2z6hKrO8H4e8PYdBHwPOANXLuh7IlLobX8P8AVggvczLyIB21xEph/qK4komjc0Hio5DSAiHSWn13dj34uBJap6wNt3CTBPRJYBeR11zkTkEVxJofB3ZfWEtYhMPxQ6DchFF13E4MGD+fOf/4zP52PBggX84Ac/oKmpiYULF1JeXk4gEOA73/kOlZWVVFRUMHfuXIqLi1m6dGlU44xmIgpXcvqMMNtdKSLnApuAr6nq7i72He79lIdZfuLsqpmJtpfvgH1rI3vMoSfD/Lu7XB06DcjixYt58skneffdd1FVLr/8ct544w2qq6spLS3lxRdfBNwzaPn5+fzsZz9j6dKlFBdHv85fvAerj1ly+kT0tOQ0Pm9StLTs429rTB+0ePFiFi9ezMyZMzn11FPZuHEjmzdv5uSTT2bJkiXcfvvtvPnmm+Tnx/6CTTRbRN0qOR3y9gHgv0P2Pb/Tvsu85SOOdcyQY98P3A8wa9YsPW60rfWQnmuTopnoOUbLJRZUlTvvvJN//dd/PWrdihUreOmll/j2t7/NhRdeyHe/+90wR4iehCw5jasO+1ERKfQGqT8KLFLVvUC9iJzpXS37LPBsRKK1uYhMPxQ6DcjFF1/Mgw8+SGNjIwB79uyhqqqKiooKsrKyuO6667jttttYsWLFUftGW0KWnFbVAyLyH7hkBvDDjoFr4EvAw0AmbpD6xAeqweYiMv1S6DQg8+fP59prr2XOnDkA5OTk8Mc//pEtW7Zw2223kZSURGpqKvfccw8AN910E/PmzaO0tDTqg9WievxeS183a9YsXb58+bE3eugS9/vGl469nTE9sGHDBqZMmRLvMGIi3HcVkfdVddbx9rX5iDoUT7SBamPixBJRh8t+Ee8IjBmw4n353hhjLBEZE20DYRz2RL+jJSJjoigjI4Oampp+nYxUlZqaGjIyMnp9DBsjMiaKRowYQXl5Od26u78Py8jIYMSIEcffsAuWiIyJotTU1CMqq5rwrGtmjIk7S0TGmLizRGSMibsB8YiHiFQDO4+xSTGwP0bhHE+ixGJxHC1RYkmUOOD4sYxW1ZLjHWRAJKLjEZHl3XkeJhYSJRaL42iJEkuixAGRi8W6ZsaYuLNEZIyJO0tEzv3xDiBEosRicRwtUWJJlDggQrHYGJExJu6sRWSMibsBn4iOV402ip87UkSWish6EVknIrd4yweJyBKvwu2SkMKS0Y4nWURWisgL3vuxIvKOd16e8OYdj0UcBSLypIhsFJENIjInHudERL7m/XcpE5HHRCQjVudERB4UkSoRKQtZFvYciPMrL6Y1InJqlOP4H++/zRoReVpECkLW3enF8YGIXNyTzxrQiagH1WijwQ98XVWnAmcCX/Y++w7gNVWdALzmvY+FWzhcvADgx8DPVXU8cBD4fIzi+CXwiqpOBqZ7McX0nIjIcOArwCxVnYabc/1qYndOHuboCsZdnYP5HK56fBOuEnI041gCTPNKgG0C7gTw/navBj7k7fMb799X96jqgP0B5uCqg3S8vxO4M06xPAtcBHwADPOWDQM+iMFnj8D9cV8AvAAI7ia1lHDnKYpx5APb8cYuQ5bH9JxwuMDnINyD4S/gqg/H7JwAY4Cy450D4D7gmnDbRSOOTusWAH/yXh/xbwdXNGNOdz9nQLeI6LqibEyJyBhgJvAOMERd2SSAfcCQGITwC+AbQNB7XwTUqqrfex+r8zIWqAYe8rqJD4hINjE+J6q6B/gJsAvYC9QB7xOfc9Khq3MQz7/hz3G4is4JxTHQE1HciUgO8BTwVVWtD12n7n8tUb2sKSIfA6pU9f1ofk43pQCnAveo6kygiU7dsBidk0Lg47jEWApkc3QXJW5icQ6OR0S+hRte+FMkjjfQE9Fxq9FGk4ik4pLQn1T1r97iyo7Ck97vqiiHcTZwuYjsAB7Hdc9+CRSISMd8VbE6L+VAuaq+471/EpeYYn1OPgJsV9VqVW0H/oo7T/E4Jx26Ogcx/xsWkRuAjwGf9pLiCccx0BPRcavRRotXqfZ3wAZV/VnIqueA673X1xOpSrZdUNU7VXWEqo7Bff+/qeqngaXAJ2MVhxfLPmC3iEzyFl0IrCfG5wTXJTtTRLK8/04dccT8nITo6hw8B3zWu3p2JlAX0oWLOBGZh+vGX66qzZ3iu1pE0kVkLG7w/N1uHziag3594Qe4BDf6vxX4Vgw/9xxc83oNsMr7uQQ3PvMasBl4FRgUw5jOB17wXp/k/SFtAf4CpMcohhnAcu+8PAMUxuOcAD8ANgJlwB+A9FidE+Ax3NhUO66V+PmuzgHuwsKvvb/ftbgrfdGMYwtuLKjjb/bekO2/5cXxATC/J59ld1YbY+JuoHfNjDEJwBKRMSbuLBEZY+LOEpExJu4sERlj4s4SkTEm7iwRmbgRkRkicknI+8sjNRWLiHxVRLIicSwTfXYfkYkb71GBWap6cxSOvcM7drfL7ohIsqoGIh2LOT5rEZnjEpEx3iRlv/UmC1ssIpldbDtORF4RkfdF5E0Rmewt/5Q3ydhqEXnDe6Tmh8BVIrJKRK4SkRtE5P+87R8WkXtE5G0R2SYi53sTdW0QkYdDPu8eEVnuxfUDb9lXcA+rLhWRpd6ya0RkrRfDj0P2bxSRn4rIamCOiNwtbrK6NSLyk+icUXOUWNy2bz99+wc3J40fmOG9/zNwXRfbvgZM8F6fgXt2DdzjB8O91wXe7xuA/wvZ99B73KRcj+MeYfg4UA+cjPuf5/shsXQ86pAMLANO8d7vAIq916W458dKcE/4/w24wlunwELvdRHu8QQJjdN+ov9jLSLTXdtVdZX3+n1ccjqCN6XJWcBfRGQVbtKuYd7qfwAPi8gXcEmjO55XlxHWApWqulZVg8C6kM9fKCIrgJW42QHDzbB5OrBM3dP0HVNXnOutC+BmQAA371Ar8DsR+QTQfNSRTFSkHH8TYwDwhbwOAOG6Zkm4ycNmdF6hql8UkTOAS4H3ReS0HnxmsNPnB4EU7ynvW4HTVfWg12XL6MZxQ7WqNy6kqn4RmY172v6TwM24aVFMlFmLyESMuondtovIp+DQxO7TvdfjVPUdVf0ubhbGkUADkHsCH5mHmzytTkSG4OZv7hB67HeB80Sk2JtH+Rrg9c4H81p0+ar6EvA13JzZJgasRWQi7dPAPSLybSAVN86zGvgfEZmAG/N5zVu2C7jD68b9V08/SFVXi8hK3HQdu3Hdvw73A6+ISIWqzvVuC1jqff6LqhpuLqFc4FkRyfC2+/eexmR6xy7fG2Pizrpmxpi4s66Z6RUR+TVuHudQv1TVh+IRj+nbrGtmjIk765oZY+LOEpExJu4sERlj4s4SkTEm7iwRGWPi7v8DP5YL5DaT/SYAAAAASUVORK5CYII=\n",
- "text/plain": [
- ""
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "steps = np.arange(1, MAX_ESTIMATORS, 1)\n",
- "srfc = StreamingRFC(n_estimators=1,\n",
- " max_n_estimators=np.max(steps),\n",
- " **params)\n",
- "\n",
- "%time train_scores, test_scores = inc_partial_fit(x, y, srfc=srfc, steps=steps, sample=0.1)\n",
- "print(f\"With {len(srfc.estimators_)}: {train_scores[-1]} | {test_scores[-1]}\")\n",
- "plot_auc(steps, train_scores, test_scores)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### Partial random forest\n",
- "3 estimators per 10 % subset, but /3 fewer steps"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 17,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "CPU times: user 2.19 s, sys: 0 ns, total: 2.19 s\n",
- "Wall time: 2.2 s\n",
- "With 40: 0.6577470048702592 | 0.6314094564082084\n"
- ]
- },
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAARwAAAELCAYAAAALJznDAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3Xl4VOX5//H3nYUsEEhI2JIACRg2AdmFIhUXFJSiVqUu9Ku1Fbu49FvbKr9Wa+23rW1t61KXumBr3XcRkUUFtSrKvoV9S8KWkA2yZzL3749zgmMgEjBnJsncr+uaK3OWmbnnkHw45znnPI+oKsYYEwwRoS7AGBM+LHCMMUFjgWOMCRoLHGNM0FjgGGOCxgLHGBM0FjjGmKCxwDHGBI0FjjEmaKJCXUBzSUlJ0YyMjFCXYUxYWrFixUFV7XK89dpM4GRkZLB8+fJQl2FMWBKR3U1Zz9NDKhGZLCKbRWSbiNzeyDrTRSRbRDaIyHMB83uJyEIR2eguz/CyVmOM9zzbwxGRSOAhYBKQBywTkTmqmh2wThYwCxivqsUi0jXgLZ4Gfq+qi0SkA+D3qlZjTHB4uYczBtimqjtUtQZ4AbiowTrXAw+pajGAquYDiMggIEpVF7nzy1S1wsNajTFB4GUbThqQGzCdB5zeYJ1+ACLyMRAJ3KWq8935JSLyGpAJvAvcrqp1J1JAbW0teXl5VFVVneRXaD1iY2NJT08nOjo61KUY06hQNxpHAVnARCAd+FBEhrjzJwDDgRzgReBa4MnAF4vITGAmQK9evY5687y8PBISEsjIyEBEPPsSoaaqFBYWkpeXR2ZmZqjLMaZRXh5S7QF6Bkynu/MC5QFzVLVWVXcCW3ACKA9Y7R6O+YA3gBENP0BVH1PVUao6qkuXo8/IVVVVkZyc3KbDBkBESE5ODos9OdO6eRk4y4AsEckUkXbAFcCcBuu8gbN3g4ik4BxK7XBfmygi9SlyNpDNSWjrYVMvXL6nad08O6RSVZ+I3AgswGmfma2qG0TkbmC5qs5xl50nItlAHfALVS0EEJGfA++J85e0Anjcq1qNCVeqyqFKHwfLqymr8lFe7eNwtfOzrNrHeYO6071TbLN9nqdtOKo6D5jXYN6dAc8V+Jn7aPjaRcBQL+sLhpKSEp577jl+/OMfn9DrLrjgAp577jkSExM9qsyEm6raOl5enssn2wvJP1xN/uEq8g9VU+1r/IqTPikdWk/gGCdwHn744aMCx+fzERXV+OafN29eo8uMORGVNXU893kO//xgO/mHq+mdHE9qpzhG9kqia8dYuibEkNIhhoTYKNrHRNGh/hEbRWJc8571tMDx2O2338727dsZNmwY0dHRxMbGkpSUxKZNm9iyZQsXX3wxubm5VFVVccsttzBz5kzgi1s1ysrKmDJlCmeccQaffPIJaWlpvPnmm8TFxYX4m5mWrrzaxzNLd/P4Rzs4WFbD2D6due+KYYzrE7oTKWETOL99awPZew8163sOSu3Ib7516leuc88997B+/XpWr17NkiVLuPDCC1m/fv2R09ezZ8+mc+fOVFZWMnr0aC699FKSk5O/9B5bt27l+eef5/HHH2f69Om8+uqrzJgxo1m/i2kbVJWVOcW8siKPuWv2cbjax4SsFG46O4sxmZ1DXV74BE5LMWbMmC9dK/PAAw/w+uuvA5Cbm8vWrVuPCpzMzEyGDRsGwMiRI9m1a1fQ6jUtn6qyt7SKN1bt4dUVeew4WE5cdCQXDOnB1WN7MaJXUqhLPCJsAud4eyLB0r59+yPPlyxZwrvvvsunn35KfHw8EydOPOa1NDExMUeeR0ZGUllZGZRaTctzqKqWHQXlbN5/iE37D7Np32E2HzhMUXkNAGMyOvPDiX25YEgPOsS0vD/vlldRG5OQkMDhw4ePuay0tJSkpCTi4+PZtGkTS5cuDXJ1piXbW1LJ22v3sb2gjB0Hy9lRUM7Bsuojy+OiI+nXPYFJA7vRv3sCZw/oSkZK+694x9CzwPFYcnIy48ePZ/DgwcTFxdGtW7cjyyZPnsyjjz7KwIED6d+/P2PHjg1hpaalqKyp49EPtvPPD7dTVesnuX07+nRpz9kDutCnSwcyU9rTv1sCvTrHExHRui74lLYytvioUaO0YQdcGzduZODAgSGqKPjC7fu2NarKnDV7ueedTewrrWLq0B7cNnkAPTvHh7q04xKRFao66njr2R6OMSF0uKqWXQcr2HGwjKc/3c2K3cWcmtqR+68Y3iLOKjU3CxxjgmjnwXKeXbqb1bkl7Cos52BZzZFlKR3a8adLh3DZyJ5EtrJDpaaywDHGY6rKx9sKmf3xThZvzicqQhjeK4lzBnQjI6U9mSnx7s/2xERFhrpcT1ngGOORGp+f11bmMfvjnWw5UEZKh3bcdHYWM8b2omtC892f1JpY4BjTzGrr/Ly6Io8H39/GnpJKBvXoyF8uG8q3TkslNrpt78EcjwWOMc3EV+fntVV7ePD9reQWVXJaz0R+f8lgzuzXxforctnImx6rv1v8ZNx3331UVFjf8S2Z36+szi3hbws3c87fPuCXr6wlMa4ds68dxRs//gYT+3e1sAlgezgea6x7iqa47777mDFjBvHxLf86jHBSVVvHks0FvLfxAIs353OwrIYIgVEZnfn1hYM4d6CFTGMscDwW2D3FpEmT6Nq1Ky+99BLV1dVccskl/Pa3v6W8vJzp06eTl5dHXV0dd9xxBwcOHGDv3r2cddZZpKSksHjx4lB/lbC3t6SSZ5bu5oVluRSV15AQG8XE/l05Z0BXzuzXhaT27UJdYosXPoHzzu2wf13zvmf3ITDlnq9cJbB7ioULF/LKK6/w+eefo6pMmzaNDz/8kIKCAlJTU3n77bcB5x6rTp068be//Y3FixeTkpLSvHWbJlNVlu0q5t+f7GL+hv2oKucO7MZ3x/VmbJ9koiOtVeJEhE/gtAALFy5k4cKFDB8+HICysjK2bt3KhAkTuPXWW7ntttuYOnUqEyZMCHGl4a282sen2wv5cGsBH24pYFdhBR1jo/jBGZnMGNu7Vdxq0FKFT+AcZ08kGFSVWbNmccMNNxy1bOXKlcybN49f//rXnHPOOdx5553HeAfjhYNl1azbU8r6vFI+2V7I8t1F1NYpcdGRjOubzA1n9uWiYanEtwufPxev2Bb0WGD3FOeffz533HEHV199NR06dGDPnj1ER0fj8/no3LkzM2bMIDExkSeeeOJLr7VDquZVWlnLs5/tZuXuYtbvOcT+Q1/0QTSgewLXnZHJmVldGJmR1Oav/A02CxyPBXZPMWXKFK666irGjRsHQIcOHXjmmWfYtm0bv/jFL4iIiCA6OppHHnkEgJkzZzJ58mRSU1Ot0bgZ1Pj8PLN0Nw++v5XiilpO6dqBcX2TOTW1I0PSOjEotSMJsTZUspese4o2JNy+b1OpKvPW7efPCzaxu7CC8ackM2vKQAandQp1aW2GdU9hDLAt/zC/fGUtK3NK6N8tgae+N5qJduVvyFjgmDbrzdV7mPXaOuKiI9t8tw+tRZsPHFUNi//N2sqhcXOo9tXxf3M38p+luxnVO4l/XDWiWUePNCevTQdObGwshYWFJCeHbuCvYFBVCgsLiY21P6q84gp+8uxK1uSVcv2ETH45eYBdnNeCtOnASU9PJy8vj4KCglCX4rnY2FjS09NDXUZIzV+/n9teXYvfrzw6YySTB3cPdUmmgTYdONHR0V8adM60TYVl1fxmzgbmrt3HqakdeeiqES1+uJRw1aYDx7R9b6/dx51vrudQVS23TurHDyf2tUOoFszTfxkRmSwim0Vkm4jc3sg600UkW0Q2iMhzDZZ1FJE8EfmHl3Wa1md/aRU/emYFP3luJamJcbx10xncdE6WhU0L59kejohEAg8Bk4A8YJmIzFHV7IB1soBZwHhVLRaRrg3e5nfAh17VaFqX/MNVzF+/n7fX7uPzXUVER0Twy8n9mTmhD1EWNK2Cl4dUY4BtqroDQEReAC4CsgPWuR54SFWLAVQ1v36BiIwEugHzgeNewWjaDl+dn/zD1ewrrWRPSRV5xRV8sLmAz3cVoQqndO3ATWdncemINHonW1tNa+Jl4KQBuQHTecDpDdbpByAiHwORwF2qOl9EIoC/AjOAcxv7ABGZCcwE6NWrV/NVbjx3sKya5z/LIf9wNUUVNZRU1FBUXktxeQ0FZdXU+b98XVF9yEwd2oN+3RJCVLX5ukLdaBwFZAETgXTgQxEZghM081Q176uun1HVx4DHwLmXyvNqTbP4eNtBfvriag6WVZMYF01SfDuS2rcjLTGWwakd6d4plh6d4uiRGEuq+7Oj3VTZJngZOHuAngHT6e68QHnAZ6paC+wUkS04ATQOmCAiPwY6AO1EpExVj9nwbFqH2jo/f1+0hUc+2E7fLh14+roxDOzRMdRlmSDyMnCWAVkikokTNFcAVzVY5w3gSuApEUnBOcTaoapX168gItcCoyxsWrfcogpufmEVq3JKuGJ0T+781iDr0CoMefYvrqo+EbkRWIDTPjNbVTeIyN3AclWd4y47T0SygTrgF6pa6FVNJjQ+2X6QG/6zAhQevHI43zotNdQlmRBp0/3hmNDLP1zFlPs+IjE+mqeuHUOvZOsPuC2y/nBMyPn9yq0vraGs2sfzM8da2BgbedN458n/7uSjrQf59dRBdirbABY4xiPr8kr584JNnDeoGzNOt2ukjMMCxxxX/uEqHl6yjdW5JU1av7zax80vrCK5fQx/unRom+6LyJwYa8MxjfLVOaMc/HXhFg5X+4DNfGdUT345uT/JHWIafd1dczawq7Cc534w1oa/NV9igWOOaWVOMb9+fT3Z+w4xISuF2yYP4K01e3nyvzt5Z/0+bj2vP1ef3uvITZOVNXVs3H+IDzYX8PKKPG486xTG9U0O8bcwLY0FjjmisqaONXklvLYyj5eW59G9YywPXz2CKYO7IyIMTuvE5aPSuWtONr+Zs4HnP8+hf/cENuw9xI6CMupvfxp/SjK3nJsV2i9jWiS7DieMlVX7+GBzAct3F7FydzEb9h7C51eiIoTrzsjk5nOy6BBz9P9Jqsr89fv50/xN1Pj8DErtxKmpHZ1HWidSO8Vau01LdHg/fPoQ7FkJIiARX/yMT4Ez/he6DTqpt27qdTgWOGGoqraOpz/dxSNLtlNcUUtsdASnpScysncSozKSGNEricR4a3tpM0py4eP7YeXT4K+FtFEQEQmqoH7ncXAr1ByG4d+Fs34FCd1O6CPswj9zlBqfnxeX5fDg+9vIP1zNhKwUfnLWKYzsnWQ95bVFB7c6QbPmeUBg2JUw/qeQ3PfodSuK4IM/w7LHYd0rcMZPYdyN0K55L9a0PZwwMX/9fn43N5s9JZWMzkji5+f15/Q+1qjb5hRuh+w3YMPrsH8dRMbAyGvgGzdDYs+mvX7RnbBpLiSkwuVPQa+xx32Z7eEYwGlveXjJdv6yYDODenTkD98ewjezUqyNpS05tBfWvgjrX4P9a5156WPg/D/A4MtO7PAouS9c8Szs/gQW/wGSMpq1VAucNqy2zs+vX1/Pi8tzuXhYKn+6bCgxUZGhLss0B181bJ4Hq56F7e857TDpo52QGXQRdPqaY5T1/gZcO7d5ag1ggdNGHa6q5cfPruSjrQe56exT+NmkfrZX01r5/XB4HxTtgOKdsG8NrH8VKouhYxqc8TMYdtWx22ZaGAucNmhvSSXX/WsZ2/LL+POlQ5k+ugnH7qZlUYXls+Hzx6BoJ9RVf7EsMgYGXAjDr4Y+ZzlnnFoJC5w2ZtP+Q1wz+3Mqqut46nujmZDVJdQlmRNVfhDevBG2vOO0xZw+E5IyoXMmdO4DHdMhsnX+6bbOqs0xfb6ziO//exnx7SJ56YfjrL/g1mjbe/DGj5zDpfP/CKf/ECLaziULFjhtxIIN+7np+VWkJ8Xx9HVjSE+yzq5aJF81bF3kBEpsJ+cRlwgxCfD547D0YegyAGa8Bt0Hh7raZmeB0wY8/3kOv3p9HUPTE5l97Wg62x3aLYsq7F0Fq5+DdS9D1Vd08zFmJky6G6LjgldfEFngtHL/eH8r9y7cwsT+XXj46hE2EkJLUFvlnE0q2gEHsp0zSgUbISoWBkx1rvhN6QeVJVBV6j5KnPaZ3t8IdfWest/OVmzhhv3cu3ALlwxP48+XDbXbE4LNX+fcPrBvjXPB3f51zpW6h/YAAVfwp4+BqffBqZc4h0/1EsOvJ0QLnFaqvNrHXXM2MKB7goVNsO34AN7/PydgfJXOvKhY6DoIMsZD577O3kpyH+d5YMiEOQucVur+97ayt7SKB68abmETTLmfw/NXQIduMOp70OM06D7UOURqpaeqg8m2UCu0af8hnvzvTq4Y3ZORvTuHupzWyVcNUY13k3pM+Rvh2cshoTtctwA6dPWmtjbMAqeV8fuVX72+nk5x0dw2eUCoy2n5/HWw8wMnLA5ucdpcDm6B8gIY8T9wwV8hqgln9Upy4D/fdkLqu69b2JwkC5xW5qXluazYXcy9l59mHZQfT9EOeOPHkPOpMx2XBCn9od9kp6e7lU87jbzT/wPtv6KrjvKD8J9LoKYcvjev2e+gDicWOK1IYVk1f3xnE6dndubSEWmhLqflUoUV/4IFv3LuM5r2IPS/8OhQyfgmvPkTeOJsuPJF6HqMPcbqw/DMpVCaB999o01ejBdMFjityB/mbaKixsfvLxlsd3435tA+mHMTbFsEmWfCRQ813vHU0Mud+5OevxKenOR0NtX3HCjYBDs/gl0fwq7/QtUhuPJ56D0uuN+lDbLAaSU+2FLAqyvz+PHEvpzS1YbNPUplsdM3zEf3OhfeTfkLjP7B8e9DSh8F17/vhM6zl0N8stO+A9CpF/S/AIZ+B/qc6f13CAOeBo6ITAbuByKBJ1T1nmOsMx24C+dKqTWqepWIDAMeAToCdcDvVfVFL2ttyXYXlnPz86vo3y2Bm8624Ve+ZN9apx/etS8718RkTICpf4eUE9hOiT3huvnw7l1Qfch5j8wJ1lbjAc8CR0QigYeASUAesExE5qhqdsA6WcAsYLyqFotIfdN/BfA/qrpVRFKBFSKyQFWbNtZsG1JW7eP6p5cjAo//zyji2rWevk9Oyr61zhmhumrw1Xzx018Lfp/7qIO6Wtj1kdMgHBXnHB6Nvh56DD25z43pABfe27zfxRzFyz2cMcA2Vd0BICIvABcB2QHrXA88pKrFAKqa7/7cUr+Cqu4VkXygCxBWgeP3K7e+tJrtBeU8fd0YeiW38TvAN70NL1zV9PU794Hzfu90RBWX5F1dptl4GThpQG7AdB5weoN1+gGIyMc4h113qer8wBVEZAzQDtjuXakt04Pvb2PBhgPcMXUQ409JCc6H1lZBeX7w7/M5tNc5Y9R9KFz0D6dXu6h27s8YiIyGiCjnIZHO2SdrOG91Qt1oHAVkAROBdOBDERlSf+gkIj2A/wDXqKq/4YtFZCYwE6BXr7Z1I9yCDfv5+7tbuHREOteNz/DmQ/x+2LfK6Tph72rnUbDROWxJGwVjf+R0yB0Z/fU+x1ftdPLdWJcL/jp4baaz3mWzT6z9xbQqXgbOHiDwfGS6Oy9QHvCZqtYCO0VkC04ALRORjsDbwK9UdemxPkBVHwMeA2dcqmauP2TW7ynlZy+u5rT0Tt6dAq/zwSvXwsa3nOm4zpA6DLImOTcbrvg3vPp9WHgHjLkeRl4L8ce5jULVudco59MvOvwu2ulcwxIdD9MegCGXHf26j+9z2mOmPWhh08Z5GTjLgCwRycQJmiuAhgfobwBXAk+JSArOIdYOEWkHvA48raqveFhji+Kr8/PPD3dw/7tbSWofzT+/O4rYaA8aif1+51qVjW/BxP8Hp13hHEIFBts491qWpQ/De791RmXMmuT059LvvC+3mVSVwtqXnE6/890muviUL/p36dwHti92AixvGUz63Re3E+Qth/d/D4MudoaZNW2aZ4Gjqj4RuRFYgNM+M1tVN4jI3cByVZ3jLjtPRLJxTn//QlULRWQG8E0gWUSudd/yWlVd7VW9obZ5/2F+/vIa1u0p5cKhPbh72qkkdzjBmwubQhUW/D9Y8xxMnAUTbzv2ehER0O9853EgG5Y94TTqbpzjtKP0Hu/cIlCw0RkatrYCegyDbz0Ag6Yd3Yg74VZnRMelDzuHcJf/C9p1cEKoYyp8635rkwkDNtRviPnq/Dz6wXbuf28rHWOj+d3Fg7lgSA/vPnDJn2DJH+D0H8HkP57YH7nfD3tXOsPAbnrbuQkyOt45TBr5PUgbcfz3WP8qvHmTM2Z1t1Nh54fwvXeaNJysabmaOtSvBU4IqSrXP72Cdzce8Havpt7SR2H+bXDaVc4l/193NIDiXc6eTGynE3td/iZ46btOYE2cBRNv/3p1mJCzscVbgblr9/HuxgPcNnkAP5rowaiJfj+U7HK6ZshZCp884LTBTHuweYYeOdkrcbsOcG4n2LHEuXXAhA0LnBAprazl7rnZDEnrxMxv9mm+N87f5NwpnfuZcxNibcUXy/pfAJc+2TJ6potJgIHfCnUVJshawG9eeLp3wWYKy6qZfc1oIiO+ZmOpvw42vwOf/9NpE4mMgV6nw4hroOtAp62kywDn8n1jQsgCJwRW55bwzGe7uWZcBkPST7D9o56v2rnvaOcS55qZ0lxnCNhz7nSCpn2Qrkw25gRY4ASZr87PrNfW0S0hllvP69f0F9bVwpb5TltM7uewbzXU1TjLMibA+X9wDplawuGSMY2w384ge+rjXWzcd4hHZ4wgIbaJtwzsWelcqHdgvXO4lDocTr/BGe+o5xinU29jWgELnCDaU1LJ3xZt4ZwBXTn/1CaERE0FLPkjfPoPZ1iS6U9DvylN6/TbmBbIAieIfjtng/PzolOPf3/Uzo+cvZrinU6bzKS7bUA10+pZ4ATJgUNVLMw+wM1nn0J60nH6tfnsn/DOLyEpE655CzK/GZwijfGYBU6QvLvxAAAXDk396hULNjt3aGed795v1MY73TJhxcaIDZJ3sw/Qq3M8/bp9xbUw/jqnE6p28U4nVBY2po2xwAmC8mofH28v5NyB3b667Wbpw073DRfcayM7mjbJAicIPtpaQI3Pz6RB3Rpf6eBWeP//nAHbBl8avOKMCaImBY6IjBWRhIDpjiLSsH9i04iF2QfoFBfNqIxGOvquP5SKioWpf7N+YUyb1dQ9nEeAsoDpMneeOQ5fnZ/Fm/I5q38XoiMb2dyfPercbDnlz3YRn2nTmho4ogEd57gdmtsZriZYmVNCcUUtkwY1EiSF2+G9u50L+oZOD25xxgRZUwNnh4jcLCLR7uMWYIeXhbUVi7L3Ex0pfLPfMW6mrCiCl69xhkGZ+nc7lDJtXlMD54fAN3A6Q68fX2qmV0W1FarKouwDjOubcvR9UxVF8PRFULAFLp0NHT3sVtSYFqJJh0XuiJhXeFxLm7O9oIxdhRV8/4zMLy84Ejab4YrnIOvc0BRoTJA1KXBE5CngqM6PVfW6Zq+oDVmUnQ/AuYGnwy1sTBhrasPv3IDnscAlwN7mL6dtWZS9n8FpHenRyR1x0sLGhLmmHlK9GjgtIs8D//Wkojai4HA1q3JL+Ok5bidbqvDcdAsbE9ZO9tR2FmDX3n+F9zcdQBXOHeRupr2rvrhtwcLGhKmmtuEc5os2HAUOAL/0qqi2YFF2PmmJcQzq0dGZsf5ViIg+9tjaxoSJph5SJYhIZ5w9m9j62Z5V1cpV1tTx320FfGdUT+dmTb8f1r/mjM3dcAhcY8JIU/dwfgDcAqQDq4GxwKfA2d6V1not2ZxPVa2f8+q7Ec35BA7vhcG/C21hxoRYUy/8uwUYDexW1bOA4UCJZ1W1cnPX7iO5fTtOz+zszFj3ijMGd/8poS3MmBBrauBUqWoVgIjEqOomoL93ZbVe5dU+3tt0gClDuhMVGQG+Gsh+wxnCpV37UJdnTEg19SxVnogkAm8Ai0SkGNjtXVmt1/ubnMOpqfVdie5YDJXF1lhsDE3cw1HVS1S1RFXvAu4AngQuPt7rRGSyiGwWkW0icnsj60wXkWwR2SAizwXMv0ZEtrqPa5r2dUJv7tq9dEmIYXRGwOFUbCL0PSe0hRnTApzwdTiq+kFT1hORSOAhYBLODZ/LRGSOqmYHrJMFzALGq2qxiHR153cGfgOMwjkbtsJ9bfGJ1htMZdU+Fm8u4KoxvZzxwmsqYNPbzt6NjSVljKddjI4BtqnqDlWtAV4ALmqwzvXAQ/VB4t4kCnA+sEhVi9xli4DJHtbaLN7NPkCNz8/Uoe6d31vmQ225HU4Z4/IycNKA3IDpPHdeoH5APxH5WESWisjkE3gtIjJTRJaLyPKCgoJmLP3kzF27l+4dYxnRy73WZv2r0KE79B4f2sKMaSFC3Yl6FM7FhBOBK4HH3cbpJlHVx1R1lKqO6tKli0clNk1pZS0fbjnIhUN7EBEhUFkCWxfC4G9DRGRIazOmpfAycPYAPQOm0915gfKAOapaq6o7gS04AdSU17Yoi7IPUFPn58L6w6lNc6GuBgbb4ZQx9bwMnGVAlohkikg7nA685jRY5w2cvRtEJAXnEGsHsAA4T0SSRCQJOM+d12K9vXYvaYlxDO/p7qCte9kZqjdtRGgLM6YF8SxwVNUH3IgTFBuBl1R1g4jcLSLT3NUWAIUikg0sBn6hqoWqWgT8Die0lgF3u/NapJKKGj7aepCpQ3s4904d2gs7P3Qai62fYmOO8HTkBVWdB8xrMO/OgOcK/Mx9NHztbGC2l/WdqLJqH7sOlnNqascvjaC5YMN+fH794nDq04cAgeEzQlOoMS2UDfVyAu54Yz2vr9pDny7tmT6qJ98ekUbXhFjmrt1Hr87xDEnrBOWFsHw2DLkckjJCXbIxLYoFThPlFlUwZ81eJvbvQnm1j3ve2cRfFmzmrP5d+WR7ITd8s4+z1/PZI1BbAWf8b6hLNqbFscBpoic+2kGEwD3fHkr3TrFsLyjjpeW5vLpiD6rKtGGpUFUKnz0GA78FXQeEumRjWhwLnCYoLKvmxeW5XDwsje6dnP7H+nbpwKwpA/n5ef3ZX1pFz87x8NFfoboUJtwa4orAXqecAAAPq0lEQVSNaZlCfeFfq/DvT3ZRVevnhjP7HLUsOjLCCZuaCvj0YTjlXEgdHoIqjWn5LHCOo7zax78/3c2kQd04pWtC4yuufBoqDsKEnwevOGNaGQuc43hhWS6llbX88My+ja/kq4FPHnDumeo9LnjFGdPKWOB8hdo6P09+tIMxGZ0Z2fsrOj9f8zwc2gMTjrqcyBgTwBqNv8Kc1XvZW1rF/10y2NmL+exR2PURdEyDpN6Q6D7++3foMcw62TLmOCxwGuH3K//8cDv9uyVwVuQ6eOR2KNwKyVmQtxwqG9xp8Z1n7DYGY47DAqcRizfnU5G/g5d6v4U8+x507gNXvQz9znNWqD4MJTnOw1cFA6aGtmBjWgELnEZsW/QE78XcR7vCKDjnNzDuJxAV88UKMQnQ7VTnYYxpEgucY6it83N24XMUx/em+w/fhE5HdTZojDkJdpbqGLLzCunNPsp6TrSwMaYZWeAcw5aNa2kndaRknhbqUoxpUyxwjuHgznUAJPYeHOJKjGlbLHAaUFX8+ZuciZR+oS3GmDbGAqeBnQfLSfPtpiwu1cYCN6aZWeA0sHxXMVmyB+nSP9SlGNPmWOA0sHxnAX0j9hKfZtfXGNPc7DqcBnJ3biaWWrA9HGOane3hBMg/XEVc6TZnoot1EWpMc7PACbDCbb8B7AyVMR6wwAmwbFcx/SL3ogk9IK7JQ5wbY5rIAifA8t1FDI3Zb2eojPGIBY6rvNrHhr2l9K7LhRQLHGO8YIHjWpVTQld/Ie38FXaGyhiPWOC4lu0qIivCbTC2M1TGeMICx7V8dxHjOxU6ExY4xnjC08ARkckisllEtonI7cdYfq2IFIjIavfxg4BlfxaRDSKyUUQeEPGuw+DaOj+rckoYGX8A4pOhfbJXH2VMWPPsSmMRiQQeAiYBecAyEZmjqtkNVn1RVW9s8NpvAOOBoe6s/wJnAku8qHXjvkNU1NTRR/Ns78YYD3m5hzMG2KaqO1S1BngBuKiJr1UgFmgHxADRwAFPqsS5/gaUxPId1mBsjIe8DJw0IDdgOs+d19ClIrJWRF4RkZ4AqvopsBjY5z4WqOpGrwpdtrOIoUk1RFSX2B6OMR4KdaPxW0CGqg4FFgH/BhCRU4CBQDpOSJ0tIhMavlhEZorIchFZXlBQcNJFrMgp5vwuJc6E3dJgjGe8DJw9QM+A6XR33hGqWqiq1e7kE8BI9/klwFJVLVPVMuAd4KhBu1X1MVUdpaqjunTpclJFHqqqpeBwNafF7Hdm2B6OMZ7xMnCWAVkikiki7YArgDmBK4hIj4DJaUD9YVMOcKaIRIlINE6DsSeHVDmFFQD0rMuFmE6Q0N2LjzHG4OFZKlX1iciNwAIgEpitqhtE5G5guarOAW4WkWmADygCrnVf/gpwNrAOpwF5vqq+5UWduUVO4CRX7XIajG24XmM842kHXKo6D5jXYN6dAc9nAbOO8bo64AYva6uX4wZOfOm2L4bxNcZ4ItSNxiGXU1RBRlwVEeX51n5jjMcscIoqGNvxoDNhgWOMp8I+cHKLKhga415TaBf9GeOpsA4cX52fvOJK+kXsgeh46Jge6pKMadPCOnD2lVbh8ytpvhzngr+IsN4cxngurP/C6k+Jd67YYe03xgRBWAdOTlEFcVQRU7Efkk8JdTnGtHlhHziZke49WMl9QluMMWEg7ANnRIdiZ6KzBY4xXgv7wDk11u1WNCkztMUYEwbCPnD6RuU73YrawHfGeC5sA6e0spaSilpS/fvscMqYIAnbwDlyl3h1ngWOMUES1oHTjlpiK2wPx5hgCdvAySmqoKfkI6gFjjFBEraBs7uogsFx7hkqCxxjgiJsAye3qIIhcUXOhAWOMUERtoGTU1RBv+h8iO0EcUmhLseYsBCWgeOr87OnuJKeut/Zu7F+jI0JirAMnPpuKVJq99jhlDFBFJaBk1tUQRQ+OlTutcAxJojCMnB2F1WQJgcRrbPAMSaIwjJwcooq6Bvp9mNsgWNM0IRt4JwWb91SGBNsYRk4uUUVDIgpgHYdoP3JjUlujDlxYRk4OUUV9JYD0DnTTokbE0RhFzj13VJ09+21TreMCbKwC5zcogoi8NOx0q7BMSbYwi5wcooqSJVCIrTWAseYIPM0cERksohsFpFtInL7MZZfKyIFIrLaffwgYFkvEVkoIhtFJFtEMpqjJqf9Zr8zYYFjTFBFefXGIhIJPARMAvKAZSIyR1WzG6z6oqreeIy3eBr4vaouEpEOgL856sopqmBQzEFQLHCMCTIv93DGANtUdYeq1gAvABc15YUiMgiIUtVFAKpapqoVzVFUblEFg2ILISoWEno0x1saY5rIy8BJA3IDpvPceQ1dKiJrReQVEenpzusHlIjIayKySkT+4u4xfW27C92rjJMybSxxY4Is1H9xbwEZqjoUWAT8250fBUwAfg6MBvoA1zZ8sYjMFJHlIrK8oKDguB/mq/Ozp6SS1Drrx9iYUPAycPYAPQOm0915R6hqoapWu5NPACPd53nAavdwzAe8AYxo+AGq+piqjlLVUV26HP+K4X2lVfj9dSRW73Eu+jPGBJWXgbMMyBKRTBFpB1wBzAlcQUQCG1GmARsDXpsoIvUpcjbQsLH5hMVER/D/JiQR5a+2PRxjQsCzs1Sq6hORG4EFQCQwW1U3iMjdwHJVnQPcLCLTAB9QhHvYpKp1IvJz4D0REWAF8PjXralrQizXn4oTZxY4xgSdZ4EDoKrzgHkN5t0Z8HwWMKuR1y4ChjZ7UUU7nJ8WOMYEXagbjYOvaAdEREOn9FBXYkzYCc/AScqAiGY5y26MOQHhGTh2OGVMSIRX4KhC0U4LHGNCJLwCp7wAasoscIwJkfAKnOrD0PsM6Dow1JUYE5Y8PS3e4iT3he+9HeoqjAlb4bWHY4wJKQscY0zQWOAYY4LGAscYEzQWOMaYoLHAMcYEjQWOMSZoLHCMMUEjqhrqGpqFiBQAu4+zWgpwMAjlNEVLqaWl1AFWy7G0lDrgq2vprarH7ee3zQROU4jIclUdFeo6oOXU0lLqAKulJdcBzVOLHVIZY4LGAscYEzThFjiPhbqAAC2llpZSB1gtx9JS6oBmqCWs2nCMMaEVbns4xpgQCovAEZHJIrJZRLaJyO1B/uyeIrJYRLJFZIOI3OLO7ywii0Rkq/szKUj1RLrjtc91pzNF5DN327zoDloYjDoS3fHkN4nIRhEZF8Jt8r/uv816EXleRGKDtV1EZLaI5IvI+oB5x9wO4njArWmtiBw1Gq0HtfzF/TdaKyKvi0hiwLJZbi2bReT8pnxGmw8cEYkEHgKmAIOAK0VkUBBL8AG3quogYCzwE/fzbwfeU9Us4D13Ohhu4YsRTgH+BPxdVU8BioHvB6mO+4H5qjoAOM2tKejbRETSgJuBUao6GGfQxisI3nb5FzC5wbzGtsMUIMt9zAQeCUIti4DBqjoU2II7jpz7O3wFcKr7mofdv7Wvpqpt+gGMAxYETM8CZoWwnjeBScBmoIc7rwewOQifnY7zC3w2MBcQnAu5oo61rTysoxOwE7cNMWB+KLZJGpALdMbpAXMucH4wtwuQAaw/3nYA/glceaz1vKqlwbJLgGfd51/6O8IZYXfc8d6/ze/h8MUvVL08d17QiUgGMBz4DOimqvvcRfuBbkEo4T7gl4DfnU4GSlTV504Ha9tkAgXAU+7h3RMi0p4QbBNV3QPcC+QA+4BSnKGlQ7Fd6jW2HUL9u3wd8M7XqSUcAqdFEJEOwKvAT1X1UOAydf6L8PR0oYhMBfJVdYWXn9NEUcAI4BFVHQ6U0+DwKRjbBMBtH7kIJwRTgfYcfVgRMsHaDscjIr/CaR549uu8TzgEzh6gZ8B0ujsvaEQkGidsnlXV19zZB0Skh7u8B5DvcRnjgWkisgt4Aeew6n4gUUTqO9MP1rbJA/JU9TN3+hWcAAr2NgE4F9ipqgWqWgu8hrOtQrFd6jW2HULyuywi1wJTgavdADzpWsIhcJYBWe5Zh3Y4DV1zgvXhIiLAk8BGVf1bwKI5wDXu82tw2nY8o6qzVDVdVTNwtsH7qno1sBi4LFh1uLXsB3JFpL876xwgmyBvE1cOMFZE4t1/q/pagr5dAjS2HeYA/+OerRoLlAYcenlCRCbjHIZPU9WKBjVeISIxIpKJ05D9+XHf0OtGuZbwAC7AaWHfDvwqyJ99Bs4u8Vpgtfu4AKf95D1gK/Au0DmINU0E5rrP+7i/KNuAl4GYINUwDFjubpc3gKRQbRPgt8AmYD3wHyAmWNsFeB6n7agWZ8/v+41tB5xG/ofc3+N1OGfWvK5lG05bTf3v7qMB6//KrWUzMKUpn2FXGhtjgiYcDqmMMS2EBY4xJmgscIwxQWOBY4wJGgscY0zQWOAYY4LGAsd4TkSGicgFAdPTmqubEBH5qYjEN8d7Ge/ZdTjGc+6l8aNU9UYP3nuX+95NHkpFRCJVta65azHHZ3s45ggRyXA7w3rc7ZBqoYjENbJuXxGZLyIrROQjERngzr/c7chqjYh86N5OcjfwHRFZLSLfEZFrReQf7vr/EpFHRGSpiOwQkYluR1AbReRfAZ/3iIgsd+v6rTvvZpwbLheLyGJ33pUiss6t4U8Bry8Tkb+KyBpgnIjcI06naGtF5F5vtqg5SjAuHbdH63jg9IXiA4a50y8BMxpZ9z0gy31+Os69WeBccp/mPk90f14L/CPgtUemcTp9egHnsv2LgEPAEJz/DFcE1FJ/eX8ksAQY6k7vAlLc56k490Z1wbkj/X3gYneZAtPd58k4l+NLYJ328P5hezimoZ2qutp9vgInhL7E7WrjG8DLIrIap2OoHu7ij4F/icj1OOHQFG+p85e/DjigqutU1Q9sCPj86SKyEliF08vcsXptHA0sUefO7/quFL7pLqvDuWMfnD5vqoAnReTbQMVR72Q8EXX8VUyYqQ54Xgcc65AqAqeDqmENF6jqD0XkdOBCYIWIjDyBz/Q3+Hw/EOXejfxzYLSqFruHWrFNeN9AVeq226iqT0TG4NwZfhlwI053HcZjtodjTpg6HYjtFJHL4Ujn3qe5z/uq6meqeidOr349gcNAwtf4yI44nXSVikg3nL596wW+9+fAmSKS4vaveyXwQcM3c/fQOqnqPOB/cfpUNkFgezjmZF0NPCIivwaicdph1gB/EZEsnDaZ99x5OcDt7uHXH0/0g1R1jYiswulCIhfnsK3eY8B8Edmrqme5p9sXu5//tqoeqx+bBOBNEYl11/vZidZkTo6dFjfGBI0dUhljgsYOqcxXEpGHcPr4DXS/qj4VinpM62aHVMaYoLFDKmNM0FjgGGOCxgLHGBM0FjjGmKCxwDHGBM3/B97mkn/K2FXGAAAAAElFTkSuQmCC\n",
- "text/plain": [
- ""
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "steps = np.arange(1, MAX_ESTIMATORS, 3)\n",
- "srfc = StreamingRFC(n_estimators=3,\n",
- " max_n_estimators=np.max(steps),\n",
- " **params)\n",
- "\n",
- "%time train_scores, test_scores = inc_partial_fit(x, y, srfc=srfc, steps=steps, sample=0.1)\n",
- "print(f\"With {len(srfc.estimators_)}: {train_scores[-1]} | {test_scores[-1]}\")\n",
- "plot_auc(steps, train_scores, test_scores)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### Forest of partial decision trees\n",
- "1 estimator per 10 % subset with all features"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 18,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "'auto'"
- ]
- },
- "execution_count": 18,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "params.pop('max_features', None)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 19,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "CPU times: user 18.7 s, sys: 46.9 ms, total: 18.8 s\n",
- "Wall time: 19.1 s\n",
- "With 119: 0.6836834655810746 | 0.6486341405033996\n"
- ]
- },
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAARwAAAELCAYAAAALJznDAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3Xl8VNXd+PHPN/u+Q4AECMoiCMgSEdy1VkFbH318xKU+Ll3oU9tfba222tpW2/qrra1Vf231Uau2tdW61RURRNxXNgXZwiaEQBLIMtlmkpk5vz/ODYSQQAhzZyYz3/frNS9m7tyZ+80lfDn33HPOV4wxKKVUOCREOgClVPzQhKOUChtNOEqpsNGEo5QKG004Sqmw0YSjlAobTThKqbDRhKOUChtNOEqpsEmKdAChUlRUZMrKyiIdhlJxadmyZbuNMYMOtV/MJJyysjKWLl0a6TCUiksi8nlf9tNLKqVU2GjCUUqFjSYcpVTYaMJRSoWNJhylVNhowlFKhY0mHKVU2MTMOBylYpE/ECQxQRARAkFDZX0rG6qbESA1OYE9ze00eTtISBDSkhLJTE3E57efOa40j9yMZHbUt9ltIuRlJJOWnEhDazs1TT52N/sYlpfOiIIMWtsD1LW0U9fSTlZqEsML0hmSk0ZSYujaJZpwlAozYwyV9W0s31bP2p1NbK5tJugsLV7t8eLxdiBAQ1sHDa0dJCYIyYmCtyMY9lj/+tUZnDb2kAOI+0wTjlK98PkDrN/VxIbqZrLTkijKSgGgsa2DqgYv7f4g/mCQTTUtNPv8jBuSTUu7nzVVHvwBQ2KC4O0IEDSGtOREjIHWjgBbapvxeP0AJCcKIwszSU5MwBhDcU4aRw3KxBjISU+iKCuVjkCQdn+QjJQkhuSmMW5INoliv7soO5XstCSMgbb2AK3tAVKSEvB2BFixvYFWn5/S/AwyUhLpCARpaO3A6w+Ql5HC4OxUCjNT2F7fyo76NrLSkshLT6EgM4Umr5/K+lYmDM0J6Tl1NeGIyGzgHiAReMgYc0cP+8wFbgUM8Ikx5nJn+2+B87D9TIuA64yWmFBHyOcP8FmVh7U7PVRUN7O9rpXdLe20+vwkJyaQmZpIbnoyHq+fldsbaPcfulVRkJlCVmoSL6/aSXKiMG5INunJifj8hvSURASbHEQgOzWJ86cMY9yQHKYOz2PckGySQ3jJ0tXEktw+7TemONuV4/fEtYQjIonAn4AvApXAxyLygjFmTZd9xgA3AycZY+pFZLCz/UTgJGCys+s7wGnAG27Fq2JXi8/PgtW7eHZFJR9vrd+bRDJTEhlRmMmg7FSG5qThDwZp9vmpavCSnJTAlTNHMnVEPscMzabVF2BPiw8RISs1iZK8dNKSExARctOT9x4nKVFITUqM5I8b1dxs4cwANhpjNgOIyBPAfwBruuzzDeBPxph6AGNMjbPdAGlACiBAMlDtYqwqRrS2+/m0spGK6iaqGr1UVDfzdkUtPn+QEQUZXDlzJOVl+UwsyaUkLx0RCdmxM1O1h+JQ3DxDJcD2Lq8rgRO67TMWQETexV523WqMWWCMeV9ElgA7sQnnj8aYtd0PICLzgHkAI0aMCP1PoAaUpVvr+PY/l1Pt8QG2f2RYXjqXzRjBuZOGcnxZfkgTjDp8kU7JScAY4HSgFHhLRCYBRcB4ZxvAIhE5xRjzdtcPG2MeAB4AKC8v1/6dOFTb5GPx2mre27SH+at2UpKfzkNXljOxJJfB2akkJGiCiSZuJpwdwPAur0udbV1VAh8aYzqALSKygX0J6ANjTDOAiLwCzALeRsUtfyDImxtqefWzXWyqbaG1PcD6XR6CBgZlp3Jx+XBumnPM3j4VFX3cTDgfA2NEZBQ20VwKXN5tn+eAy4BHRKQIe4m1GTgK+IaI/Bp7SXUacLeLsaoo1tYeYMX2en710lrW7PSQnZrEpNJcctKSOGv8aM6dNJRjhmTr5dIA4FrCMcb4ReQ7wKvY/pmHjTGficgvgKXGmBec984WkTVAALjRGLNHRJ4GzgRWYTuQFxhjXnQrVhV9lm+r56mllby7cTfb6loBGJqbxj2XTmHOxKGkJOmsnIFIYmVoS3l5udElRgeuZp+fV1btZMvuFlbtaOTtit1kpSYx86hCpgzPZWRhJmceM1jvBEUpEVlmjCk/1H76t6cipsXn5+VVO/lg0x4Wrqmm2ecnKUEozknjh7PHcdWsMk0wMUb/NpXrajxePqvyUO3xkpqcQGZKEi3tfu5csJ6qRi8FmSmcfWwxXzlhJFOH5+mdpRimCUeFnDGG1Ts8LFqzi4Vrqlm3q6nH/cYWZ/HEJTM5YVSBdvjGCU04qt+MMazZ6eG9jXtYWdlAXXM7e1p8bK9ro60jQIJAeVkBPz73GKYMz2dYXhrt/iAtvgBef4DjSvO08zfOaMJRh60jEOSppZX89b2trK+2rZfOtVNGFmZy8uhBHDsshzOOGUxBZkqEo1XRRBOO6pExhoqaZnY2epk6Io+ctGQaWzt4/pMdPPLuVrbsbmFyaS6/umAiZ08oZnBOWqRDVgOAJhwFwI6GNtZWeahp8rF0ax1vb9xNbZOdk5SYIGQkJ9Lks2u4TCzJ4aEry/nC+MHa96IOiyacOLS72ceHm+tYs7ORbXVtrN/lYUN18973CzJTOGl0ESePLmRYXjofb63H09ZBUVYKp40dzKTSvq2zolR3mnDiiLcjwGMffM5dizbQ2h4gKUEoyU9nZGEmc8uHM31kPoNz0hiak7bfrelTxoRuiUkV3zThxIGNNU18558r2FDdRNDAGeMGcd1ZYxk/NFsXi1JhpQknxrX7g1z3xEqqPV6+c+YYykfmc8qYIu17URGhCSeGGWP4w2sb+KzKw/1XTGf2xCGRDknFOU04MWrL7hZ++dIaXl9Xw8XTSzXZqKigCSfGbKxp5tYXPuOdjbvJSEnklvPGc/WJZZEOSylAE05M2V7Xylce+oCOgOH6L47l0hnDGZytA/JU9NCEEyOWfV7H9//1Cd6OIE9+cxbjhoSv1pBSfeXqzDkRmS0i60Vko4jc1Ms+c0VkjYh8JiL/7LJ9hIgsFJG1zvtlbsY6EAWDhvc27ubafyzjovvep90f5JFrjtdko6JWVBbCc/wNuN0Ys0hEsoDwF1aOYsYYrn9yJc+trCInLYnvnDGaa884mowUbbSq6BWVhfBEZAKQZIxZ5GxvRu3nD69V8NzKKv7PmaP59hmjSUvWAXwq+rl5SdVTIbySbvuMBcaKyLsi8oFTi7xze4OIPCsiK0TkTqfFtB8RmSciS0VkaW1trSs/RLSpamjjB09+wr2LK5hbXsr1XxyryUYNGJFuf/dWCC8JOAWYCmwD/gVcDfyl64fjqRCeMYYnPt7OL15cQ8AYvnnqUfzg7HE6YlgNKNFaCK8SWNnlcuw5YCbdEk68+GhLHfcuruCdjbs5eXQRd1w0idL8jEiHpdRhi9ZCeA1AnogMMsbUYmtUxWUNmD8s2sA9iysozEzh1i9P4MpZZbrIuBqworIQHoCI3AAsFnvNsAx40K1Yo9X8VTu5Z3EF/zm1hNsvnER6ivbVqIFNC+FFoWDQ9tf88qU1HDM0myfmzdRlJFRU00J4A8yG6iZ2N/tYU+Xh2eU7WLPTw8yjCrj3sqmabFTM0IQTBR7/aBs3P7tq7+tjh+Xwu4uP46JpJXoXSsUUTTgR5vF28LtX11M+Mp8fnD2Okrx0RhTqHSgVmzThRNifl2yirrWdv54/g4kluji5im2acCKkvqWdu1/bwGMfbuPCqSWabFRc0IQTAUvW13DjU59Q19LOZTNG8KM5x0Q6JKXCQhNOGBljuGdxBXe/VsExQ7L521dPYMKwnEiHpVTYaMIJE2MMt7+8lofe2cJF00q5/cKJOulSRY9gEBKcudx+H7TWQVs95I2A1KyQHUYTTpi8+OlOHnpnC1efWMbPvjRBpyeoyAn4YfMSaNwOviZY+yJUfgzp+SAJ0Lpn375XvQSjTgnZoTXhhIHPH+DOV9cxfmgOP9VkM7B4PfD5u1A42j4G0rioYBB2roSKRZCUAhmFsGM5rH8Fmnft2694Ipz0PZt8TBByhkFmEaTlwaDQ9i9qwgmDxz7Yxva6Nv7+tUkkarKJPgE/fP4ONDj/4yelgs8DlUth42Lwt9n98kbA1P+G3OGw5U2bjJJSYORJMG4O5JYe+N3GQMPnUP0ZZA6GYVMgMRnaW2Hb+9BcAwEfbHjVtjIGT4CSafYfetVKqFhok1xaHuSPhKR08DXaP1OzALGfb2uAxkrbamlvtdtM5yKZAjhTmFJzoOwUmHI5lEyHxBTILAzDSXYi0blU7mpt93PSHa8zsSSXv3/thEiHozo1bIc1z0PdJli/AJqqDtwnvwyOOgMmnA/1W+3+m9+w72UUQfZQ8DZC4zZITIVzbofjv27f37kS1rwAq5+Ghm37vjMhGVIyoaMVAu37tmcPhVGnQs1aqFkDQb9NBkefCckZ9jKn4XPwt0NaDvi9NjmC3S8tD3KGQt5Im4gSU21iyxsJY8+xz5tr7M+UEPq+Q51LFSWe+Gg79a0dfO+ssZEOJb61NdjkUr8Vtn8MSx+2rYC0PBg+A+b8xrY+UrJsp2lSKmQU7P8d5V+Fus32H3rxpH2drLsrYMHNMP8GeP1XgLGJSBLh6DPgpOtgyGTwVEHVCptsktOh7FQoGGVbMHkj9yUCf7s9TvYQSM8L3TlIjfzi+ppwXNQRCPKXd7Ywo6yA6SPzIx1OfKrbDC9dbztJO0kCHHcZnPYje5lyOAqOOnBb0Ri4/ElY8XfY9am9lCk9HsacbftCujr2gkMfIykFBsfm2CxNOC56bsUOdjS08csLjo10KPHDGKjfYjtHNyyAtS/Zy4nTb4biY23C6LzsCKWEBJh+VWi/MwZpwnHJmxtq+enzq5lUkssZ4wYf+gPqyGx9B5b/HTYthhZnQf30Aph8sU02OcMiG58CXE44ThWGe7Ar/j1kjLmjh33mArdiu9E/McZc3uW9HGxZmeeMMd9xM9ZQ+nDzHr7+148ZMzibR645XpeYcENTte3ArVkD2z+Cbe/Z/pjRZ0HZyTD0ONtvkqj/p0aTaC6EB/BL4C23YnRDRyDIT55bzZDcNB6fN5Pc9ORIhxRbqlbA4l/algzYuz4FR8E5v4bya2xnrIpaUVkIz9l/OlAMLAAOebstUiqqm3h2xQ7GDM7imCE5vFVRy8aaZh68slyTTagEg7D2eVj6iB3/kl4Ap//Y3u4dMsmV27zKHW4mnJ4K4XUfiDIWQETexV523WqMWSAiCcDvgSuAs3o7gIjMA+YBjBgxInSRH4aH3t7Cv5Zu32/b6eMGcdZ47bc5YsGgHeX72s9hxzI78O7MW2DGPEjT5TwGokhf4PZWCO8KYL4xpvJg/R/RUAjv0x2NnDy6iJ9/eQLrdjWxdXcL/1Veqv023VWvsbeoR39h/8seY+yAtIREewt58xvwwf3Q3gx7NtkBeVnFcOH/wqS5+8a+qAEpWgvhzQJOEZFrgSwgRUSajTE3uRjvYWtrD7ChuolvnXY0Y4qzGVMc+YFVUadqJbz/J1j1FGBsx+6Ys+0t6qrl9u5S52TBgqPt4LzsYXZEbGk5jP8yjDs39LexVUREZSE8Y8xXOncQkauB8mhLNgBrdjYSCBoml8Z5877DC+tesneFBo21rz/7N3x4vx3in5RuR9uWnQKf/sv2w6x6ErKGwNjZ9nPtTfD5e3DcpXDidyE5LdI/lXJB1BbCGwg+2d4IwHHDQzj8/EgFA/ZSpGhMeGY279kET10Fu5yqE7kjwLMDTMBOQJxzpx0Lk+6MtB5zlr2Mat1jZy/rpWdccbUPxxgzH5jfbdvPujw3wPXOo7fveBR41J0Ij8yqHY0U56RSnBMl/xsHg/DsN2D1M3Ym8Kk/hHGzYfdGePM3dhJheh7MvsPO4ekPY+zYF68H1r8MHz0ISWlw0V/sbOWq5TB5rl1DZdRpPScUkQOH/Ku4EOlO4wHtk8oGJpVESevGGHj1xzbZHHcZbP8QHr/EDoTb9qGdPzR0Mnz+Ptx/CoyYaTtxy6+BWc6YShO0++2ugNp1Nmmk5+9/jJevtxMfARB7CXTmLT0vzaBUN5pw+snj7WBzbQsXTimJdCg2ESz6GXx4H8y8Fs75v3Z5g3fvgTd/a8eqzP2rTQoN2+DF79nWSEYhLLwF1s232z2V7Ld2SnKG7WNJzoCUDHs3ac1zcML/2DEw+aP631JScUkTTj+9vtaOUZwyIsItnIAfFtwEHz9o12I5+3Z7yZKYDKfeYJdUSMvdNzgubwT897P2uTHwzl12QF3JVJj23xDosHeI8svgk8dhy1u25eNrtotSnXQdnHWb9r2oftGE0w8+f4DfLVzPhKE5nHh0BPsiWnbDU1fD1rftZdHZvzowEXRf06UrETjlB/bRk+5r2QY6bCJTqp804fTD3977nMr6Nh772uTILRna4YV/XGw7cC+4H6Zc5v4xNdmoI6QJ5zA1tLbz/16v4LSxgzh5TARbN6/80N4RuuQfMP5LkYtDqcOg48QP072LN9Ls8/Pjc8dHLoiNi2H5X+Hk6zXZqAFFE85h2Lq7hb9/sJVLjh/OuCERnMaw/G/2DtPpN0cuBqX6QRPOYbjvjU0kJybw/UguiN5aB+vn24mMSSmRi0OpftA+nMOwakcjJ4wqYHAkRhb7muxiU6ufseVFpnSflqZU9NOE00fBoGHz7mZmHR2+omEArH4WXvkRtNTYAXhJabZEydDJ4Y1DqRDQhNNHVY1teDuCHD0ojMskVK2E575lJ0HO/JatqlixCE4cMMs7K7UfTTh9tLm2BYCjBmX27wsatsMTl0FOia3mOOMbvS+NueYFuxDVupdthccrntHJjiomaMLpo021zQD9b+G88weoWWerKm5YABsXwZfvtdMOui4u9eH/2jE2ablQNA7O+50mGxUzNOH00abaZnLSkijK6sedoaZdsOIx29F7/r12tvX8G+EPE+z7w6bZ+tU16+DTJ+CYL8HFj+rIXhVzNOH00ebaFo4alNW/tYrf/xMEO+zER7ATKodNs/WUvA32ztNrt9qlIKZdCef+XpONiklRWQhPRKYA9wE52JUAbzfG/MvNWA9lU20zJ48edPgf3PkpfPQAHPufUHj0vu3DptgHwKk32qUfsgbrLGwV06K1EF4rcKUxpkJEhgHLRORVY0yDW/EeTLPPT7XH1/cO44ZttlVTejwsvs3WUZr96973F4Hs4tAEq1QUi8pCeMaYDZ07GGOqRKQGGAREJOFsPpwOY2PguWvtkhEf3g+JKXDNAtt6USrORWUhvK47iMgMIAXY1P0A4SqE90mlXSz96IO1cFY9bdenCfhssjnvLhg0DhJToXS6a7EpNZBEutO4x0J4nZdOIjIU+DtwlTEm2P3D4SiEV9Pk5a6F6zmuNLf3Fs6mJfDM19m7NGfZKbZjWPtjlNpPtBbC+1hEcoCXgZ8YYz5wMc5eGWP48bOraG0P8Pu5U0joabEtT5VNNoPGwQX32QF7ky/RZKNUD6KyEJ6IpAD/Bv5mjHnaxRgPakN1M6+treFHs49h9OBeWjdv/96Wpb1mvk06JdPCG6RSA4hry1MYY/xAZyG8tcCTnYXwROR8Z7dXgT1OIbwl7CuENxc4FbhaRFY6jyluxdqb5dvqAZg9cUjPOwSDsPZFW7p20LgwRqbUwBSVhfCMMY8Bj7kZW18s/7ye/Ixkygozet6h8mNorobx5/f8vlJqP7oA10Es31bP1BH5vY8uXveiXaNm7NnhDUypAUoTTi8aWtvZVNvCtJ7qTgX89rH2RTjqNDvRUil1SJG+LR61Vm63Ywynjcjf/432FvjTTGjcZl+f/P0wR6bUwKUJpxfLtzWQIDB5eLcWzscP2WQz89uQkgmTLo5MgEoNQJpwerFiWz1ji7PJSu1yinxN8M7dcPQXYPb/jVxwSg1Q2ofTA58/wNKt9Rxf1q1M7kcPQlsdnPGTyASm1ADXp4QjIjNFJLvL6xwR6T4vKmYs21pPW0eAU8d2W45i3csw/ASdG6VUP/W1hXMf0NzldbOzLSa9WVFLcqLsX6Eh0AG7VtklJ5RS/dLXhCPOID0AnImUMdv/89aG3Uwfmb9//03tOjsTfNjUyAWm1ADX14SzWUS+KyLJzuM6YLObgUVKTZOXtTs9B15OVa2wf2rCUarf+ppw/gc4ETsJs3Ndm3luBRVJb2/YDcBpPSWc1FzIHxWBqJSKDX26LHJW4rvU5Viiwodb9lCQmcL4ITn7v1G1AoYdBwl6Y0+p/upTwhGRR9i7utQ+xpivhjyiCPu0spHJpbn7r33j98Gu1TDr2sgFplQM6GvH70tdnqcBFwJVoQ8nstraA1TUNHP2hG4LmtessWVetP9GqSPS10uqZ7q+FpHHgXdciSiC1uz0EAgaJpZ0m4y55S37pyYcpY5IfzskxgAxV4Zg9Q67WPrk0i7zp/zttvzuyJMhvywygSkVI/o60rhJRDzOoxF4EfhhHz43W0TWi8hGEbmpl33misgaEflMRP7ZZftVIlLhPK7q6w90JD6tbKQoK5XinNR9G1c/DZ4dcPL3whGCUjGtr5dU2SJSgG3ZpHVuPthnjqQQnnOsnwPlznGWOZ+tP6yf7jCt2tHA5NLcfQtuBYPw7j1QPBFGn+XmoZWKC31t4XwdeBNYgC3L2/nnwewthGeMaQc6C+F11WMhPOAcYJExps55bxEwuy+x9ldru5+NNc1M6tp/s2OZHWE881qtwqBUCPS1D+c64Hjgc2PMGcBUDl0Fs6dCeCXd9hkLjBWRd0XkA6cWeV8/G1Jrd3oIGvZPOBsWgCTCMee6eWil4kZfb4t7jTFeEUFEUo0x60QkFGUKeiyE19cPh7Ly5ra6VgBGFSRBw3bIG24TzohZkJ5/iE8rpfqiry2cShHJw9aRWiQizwOfH+IzfS2E94IxpsMYswXoLITXl89ijHnAGFNujCkfNGhQ97cPy85GLwAj3/4h/LEcNr0O1ath7DlH9L1KqX36lHCMMRcaYxqMMbcCPwX+AlxwiI/tLYTnFLa7FHih2z7PYVs3dC2Eh61XdbaI5ItIPnC2s801uxq9jE/bQ9KaZ8Hvhcedmn1jXe06UiquHPY4HGPMm8aYF5yO4IPt1+9CeMaYOuCX2KT1MfALZ5trdjV6+VaK02fzxV+Avw0KjoKiMW4eVqm4EpWF8Jz3HgYedjO+rloaqjmn4zWYcgmc+F1oroHB4/XulFIhFLOLaB2uYY0rSTU+mHa1TTLn3B7pkJSKObrWAtDuD5LprbYvdPqCUq7RhINd5W+I1BFISIaMwkN/QCnVL5pwsB3GxVJHe3qxLrCllIv0Xxd2DM5QqcNkD4t0KErFNE04OC0c6kjK04SjlJs04QC7GtsYKnUk55dGOhSlYpreFgc89bWkSQfkaAtHKTdpCwfwN1TaJ5pwlHKVJhxAPDvtE+00VspVcZ9wgkFDmneXfaEtHKVcFfcJp661ncHUYRDIHhLpcJSKaXGfcHY3+xhCHb7UIkhMjnQ4SsW0uE84tU0+hkg9gSxt3SjlNk04TT6GSJ323ygVBnGfcHY324Sjg/6Ucp+rCedQhfBE5GoRqRWRlc7j613e+61THG+tiNwr4s5KWHWNHvKkheTcoW58vVKqC9dGGvelEJ7jX8aY73T77InAScBkZ9M7wGnAG6GO09tgS2FJ1pEtwq6UOjQ3Wzh9KYTXG4Ot8JkCpALJQLUbQfqbau2TTE04SrnNzYTT12J2F4nIpyLytIgMBzDGvI9dVH2n83jVGLPWjSBNsyYcpcIl0p3GLwJlxpjJ2HK+fwUQkdHAeGw9qhLgTBE5pfuHRWSeiCwVkaW1tbX9CiChbbd9oiv9KeU6NxPOIYvZOSVhfM7Lh4DpzvMLgQ+MMc3GmGbgFWBW9wMcaSG8jkCQtI56+0JbOEq5zs2Ec8hCeCLS9dbQ+dj6VQDbgNNEJElEkrEdxiG/pKpraacQDwFJhtTsUH+9Uqob1+5SGWP8ItJZCC8ReLizEB6w1BjzAvBdpyieH6gDrnY+/jRwJrAK24G8wBjzYqhjrG3yUYCH9rRC0rX+lFKui3QhvJuBm3v4XAD4ppuxAdQ2+ygUD8GMIrcPpZQi8p3GEVXb5KNQGknI1ISjVDjEdcLZ3eyjkCaSc4ojHYpScSGuE45t4XhIytY7VEqFQ1wnnMbGBjLEB3pJpVRYxHXC8TXaeVQ6Bkep8IjrhBNochKO3qVSKiziNuEYYzAtzrQGbeEoFRZxm3A8Xj85wQb7QvtwlAqLuE041R4vBTTZF5pwlAqLuE44heIhkJQOKZmRDkepuBC3CWdXo5dCacSk67IUSoVL3CacmiY7yjhBlxZVKmziNuHsavRSnOghIWtwpENRKm7EbcKp9ngpEg9oC0epsInfhNPYRr5p1DE4SoVR3CacVk8dSfghUy+plAqXaC6EN0JEFjqF8NaISFmo4goEDbToPCqlwi0qC+E5/gbcboxZJCJZQDBUse1p9tnLKdA+HKXCKCoL4YnIBCDJGLMIwKne0BqqwHZ5vBSJk3C0haNU2ERlITxgLNAgIs+KyAoRudNpMYVEtccuvAVoH45SYRTpTuMeC+FhL/VOAW4AjgeOYl9Fh736Wwiv2mnhGAQyCo7wR1BK9VW0FsKrBFY6l2N+4DlgWvcD9LcQXkNrO0V4bLXNhJA1nJRShxCthfA+BvJEpDOLnAl072zutyavn0EJHkRHGSsVVlFZCM8YExCRG4DFIiLAMuDBUMXm8XZQnOCBTK3WoFQ4RWUhPOe9RcBkN+LyeP220zhzkhtfr5TqRaQ7jSOiyeungAa9Ja5UmMVlwvG1NZNh2nTQn1JhFpcJJ6FVF09XKhLiMuGkeOvsE004SoVVXCacjPY99omOMlYqrOIu4XQEgmRpeRilIiLuEk6T109+Z3mYDF1AXalwisOE00G2tGFI0PIwSoVZHCYcP1m04U/OApFIh6NUXHF1pHE08jgtnGBKVqRDUTGio6ODyspKvF5vpENxXVpaGqWlpSQnJ/fr8/GXcNpsC8ek5kQ6FBUjKisryc7OpqysDInv6zQZAAANkklEQVThVrMxhj179lBZWcmoUaP69R1xeEnVQTatkJod6VBUjPB6vRQWFsZ0sgEQEQoLC4+oJReHCcdPlrSRmKYtHBU6sZ5sOh3pzxmfCYc2EtM14ajY0NDQwJ///OfD/ty5555LQ0ODCxH1Lu4SjsfbQY60kZCeG+lQlAqJ3hKO3+8/6Ofmz59PXl6eW2H1KO46je04HO3DUbHjpptuYtOmTUyZMoXk5GTS0tLIz89n3bp1bNiwgQsuuIDt27fj9Xq57rrrmDdvHgBlZWUsXbqU5uZm5syZw8knn8x7771HSUkJzz//POnp6SGP1dWEIyKzgXuwK/49ZIy5o9v7VwN3sm+t4z8aYx7q8n4OdmnR53qpXXXYWtq8pNEOepdKueC2Fz9jTZUnpN85YVgOP//ysb2+f8cdd7B69WpWrlzJG2+8wXnnncfq1av33kl6+OGHKSgooK2tjeOPP56LLrqIwsL9R9lXVFTw+OOP8+CDDzJ37lyeeeYZrrjiipD+HBDdhfAAfgm8Fcq4/G3OL4MmHBWjZsyYsd9t63vvvZd///vfAGzfvp2KiooDEs6oUaOYMmUKANOnT2fr1q2uxOZmC2dvITwAEekshNenxdBFZDpQDCwAykMVVGBvwtFLKhV6B2uJhEtm5r4pO2+88QavvfYa77//PhkZGZx++uk93tZOTU3d+zwxMZG2tjZXYovKQngikgD8HluXKrS8mnBUbMnOzqapqanH9xobG8nPzycjI4N169bxwQcfhDm6/UW60/hF4HFjjE9EvokthHcmcC0w3xhTebD7/iIyD5gHMGLEiL4dsV0TjoothYWFnHTSSUycOJH09HSKi/dVI5k9ezb3338/48ePZ9y4ccycOTOCkbqbcPpUCK/Ly4eA3zrPZwGniMi1QBaQIiLNxpibun3+AeABgPLyctOXoBLam20Xtg78UzHkn//8Z4/bU1NTeeWVV3p8r7OfpqioiNWrV+/dfsMNob+w6ORmwtlbCA+baC4FLu+6g4gMNcbsdF7uLYRnjPlKl32uBsq7J5v+8HYESAu02ISjncZKhV1UFsJzS5PXT7Y4nWF6SaVU2EVtIbwu+zwKPBqKeJq8HWShCUepSImrqQ15GSmcMzoDI4mQnBHpcJSKO5G+SxVWBZkpFBQnQnW2rvanVATEVQsHAF+TdhgrFSHxmXD0lriKIf1dngLg7rvvprW1NcQR9S4OE45HO4xVTBlICSeu+nAAO7UhSytuqtjRdXmKL37xiwwePJgnn3wSn8/HhRdeyG233UZLSwtz586lsrKSQCDAT3/6U6qrq6mqquKMM86gqKiIJUuWuB5r/CUcXxMUHh3pKFSseuUm2LUqtN85ZBLMuaPXt7suT7Fw4UKefvppPvroI4wxnH/++bz11lvU1tYybNgwXn75ZcDOscrNzeWuu+5iyZIlFBWFpwptHF5Saaexil0LFy5k4cKFTJ06lWnTprFu3ToqKiqYNGkSixYt4kc/+hFvv/02ubmRWfEyPls42oej3HKQlkg4GGO4+eab+eY3v3nAe8uXL2f+/PnccsstfOELX+BnP/tZD9/grvhq4QQ6wN+mLRwVU7ouT3HOOefw8MMP09zcDMCOHTuoqamhqqqKjIwMrrjiCm688UaWL19+wGfDIb5aOD7nxOptcRVDui5PMWfOHC6//HJmzZoFQFZWFo899hgbN27kxhtvJCEhgeTkZO677z4A5s2bx+zZsxk2bFhYOo3FmD6t6hD1ysvLzdKlSw++U/1WuOc4uOA+mHL5wfdVqo/Wrl3L+PHjIx1G2PT084rIMmPMIVfmjK9LKoCRJ0FOTwsPKqXcFl+XVPllcM38Q+6mlHJH/LVwlFIRowlHqRCIlb7QQznSn9PVhCMis0VkvYhsFJEDlggVkatFpFZEVjqPrzvbp4jI+yLymVPR4RI341TqSKSlpbFnz56YTzrGGPbs2UNaWlq/vyNaC+G1AlcaYypEZBiwTEReNcaEt/K6Un1QWlpKZWUltbW1kQ7FdWlpaZSWlvb781FZCM8Ys6HL8yoRqQEGAZpwVNRJTk7er9Kl6l1UFsLrSkRmACnAJnfCVEqFS6Q7jV8Eyowxk4FF2EJ4e4nIUODvwDXGmGD3D4vIPBFZKiJL46E5q9RA52bC6VMhPGOMz3n5EDC98z0RyQFeBn5ijOmxPqkx5gFjTLkxpnzQoEEhDV4pFXpRWQhPRFKAfwN/M8Y83ZeDLVu2bLeIfH6I3YqA3X3/EVwTLXFA9MSicRwoWmLpSxwj+/JF0VoIby5wKlDoVN4EuNoYs/IgxztkE0dElvZlvofboiUOiJ5YNI4DRUssoYwjKgvhGWMeAx5zMzalVPhFutNYKRVH4i3hPBDpABzREgdETywax4GiJZaQxREz6+EopaJfvLVwlFIRFBcJ51CTSF0+9nARWSIia5zJqNc52wtEZJGIVDh/5ocpnkQRWSEiLzmvR4nIh865+ZczJMHtGPKckeXrRGStiMyK4Pn4vvP3slpEHheRtHCdExF5WERqRGR1l209ngex7nVi+lREprkcx53O38+nIvJvEcnr8t7NThzrReScwzlWzCecLpNI5wATgMtEZEIYQ/ADPzDGTABmAt92jn8TsNgYMwZY7LwOh+twxjs5fgP8wRgzGqgHvhaGGO4BFhhjjgGOc+IJ+/kQkRLgu0C5MWYidvjGpYTvnDwKzO62rbfzMAcY4zzmAfe5HMciYKIzC2ADzt1k53f3UuBY5zN/dv6N9Y0xJqYfwCzg1S6vbwZujmA8z2Nn0K8HhjrbhgLrw3DsUuwv8ZnAS4BgB3Ql9XSuXIohF9iC03/YZXskzkfnfL8C7BCRl4BzwnlOgDJg9aHOA/C/wGU97edGHN3euxD4h/N8v38/2HF2s/p6nJhv4dD3SaSuE5EyYCrwIVBs9o2y3gUUhyGEu4EfAp3z0gqBBmOM33kdjnMzCqgFHnEu7R4SkUwicD6MMTuA3wHbgJ1AI7CM8J+Trno7D5H8Pf4q8Eoo4oiHhBMVRCQLeAb4njHG0/U9Y/+rcPV2oYh8Cagxxixz8zh9kARMA+4zxkwFWuh2+RSO8wHg9I/8BzYJDgMyOfDSImLCdR4ORkR+gu0W+Ecovi8eEs4hJ5G6TUSSscnmH8aYZ53N1c5s+M5Z8TUuh3EScL6IbAWewF5W3QPkiUjniPNwnJtKoNIY86Hz+mlsAgr3+QA4C9hijKk1xnQAz2LPU7jPSVe9nYew/x4704q+BHzFSX5HHEc8JJy9k0iduw2XAi+E6+AiIsBfgLXGmLu6vPUCcJXz/Cps345rjDE3G2NKjTFl2HPwujHmK8AS4L/CGMcuYLuIjHM2fQG7KFtYz4djGzBTRDKcv6fOWMJ6Trrp7Ty8AFzp3K2aCTR2ufQKORGZjb38Pt8Y09otvktFJNWZmD0G+KjPX+x2x1w0PIBzsT3tm7DLXYTz2Cdjm8WfAiudx7nY/pPFQAXwGlAQxphOB15ynh/l/MJsBJ4CUsNw/CnAUuecPAfkR+p8ALcB64DV2LWXUsN1ToDHsX1HHdiW39d6Ow/YDv4/Ob/Dq7B31tyMYyO2r6bzd/b+Lvv/xIljPTDncI6lI42VUmETD5dUSqkooQlHKRU2mnCUUmGjCUcpFTaacJRSYaMJRykVNppwlOvE1oo/t8vr80O1TIiIfE9EMkLxXcp9Og5Huc4ZIl9uDqwhH4rv3up8d5/LqYhIojEmEOpY1KFpC0ftJSJlzoJYDzqLUi0UkfRe9j1aRBaIyDIReVtEjnG2X+wsZvWJiLzlTCf5BXCJiKwUkUtE5GoR+aOz/6Micp+IfCAim0XkdGdBqLUi8miX490ntsrqZyJym7Ptu9hJl0tEZImz7TIRWeXE8Jsun28Wkd+LyCfALBG5Q+yiaJ+KyO/cOaPqAOEYPq6PgfHAroniB6Y4r58Eruhl38XAGOf5Cdi5WWCH3Zc4z/OcP68G/tjls3tfYxd/egI7dP8/AA8wCfuf4bIusXQO8U8E3gAmO6+3AkXO82HY+VGDsLPSXwcucN4zwFzneSF2WL50jVMf7j+0haO622L2FRxchk1C+3GW2jgReEpEVmIXhxrqvP0u8KiIfAObHPriRWP/5a8Cqo0xq4ytJf9Zl+PPFZHlwArsanM9rdp4PPCGsbO/O5dUONV5L4CdsQ923Rsv8BcR+U+g9YBvUq5wtRCeGpB8XZ4HgJ4uqRKwi1RN6f6GMeZ/ROQE4DxgmYhMP+DTvR8z2O34QSDJmZV8A3C8MabeudRK68P3duU1Tr+NsVVhZ2Bnh/8X8B3sch3KZdrCUYfN2AXEtojIxbB3ge/jnOdHG2M+NLbCai127ZQmIPsIDpmDXairUUSKsev7dur63R8Bp4lIkbPO7mXAm92/zGmh5RpbGfb72HWVVRhoC0f111eA+0TkFiAZ2w/zCXCniIzB9sksdrZtA25yLr9+fbgHMsZ8IiIrsMtIbMdetnV6AFggIlXGmDOc2+1LnOO/bIzpaS2bbOB5EUlz9rv+cGNS/aO3xZVSYaOXVEqpsNFLKnVQIvIn7Dq/Xd1jjHkkEvGogU0vqZRSYaOXVEqpsNGEo5QKG004Sqmw0YSjlAobTThKqbD5/x2hRcj0YeBwAAAAAElFTkSuQmCC\n",
- "text/plain": [
- ""
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "steps = np.arange(1, MAX_ESTIMATORS, 1)\n",
- "srfc = StreamingRFC(n_estimators=1,\n",
- " max_n_estimators=np.max(steps),\n",
- " max_features=x.shape[1],\n",
- " **params)\n",
- "\n",
- "%time train_scores, test_scores = inc_partial_fit(x, y, srfc=srfc, steps=steps, sample=0.1)\n",
- "print(f\"With {len(srfc.estimators_)}: {train_scores[-1]} | {test_scores[-1]}\")\n",
- "plot_auc(steps, train_scores, test_scores)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.6.7"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/notes/PerformanceComparisonsDask.ipynb b/notes/PerformanceComparisonsDask.ipynb
deleted file mode 100644
index a72abe7..0000000
--- a/notes/PerformanceComparisonsDask.ipynb
+++ /dev/null
@@ -1,683 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Performance comparisons\n",
- "\n",
- "In memory and out of memory, using dask."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Change dir to repo root if running from repo (rather than pip installed)\n",
- "# (Assuming running from [repo]/notes/)\n",
- "import os\n",
- "os.chdir('../')\n",
- "\n",
- "%load_ext autoreload\n",
- "%autoreload 2"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [],
- "source": [
- "import matplotlib.pyplot as plt\n",
- "import numpy as np\n",
- "\n",
- "from typing import Tuple\n",
- "\n",
- "from incremental_trees.trees import StreamingRFC\n",
- "\n",
- "from sklearn.ensemble import RandomForestClassifier\n",
- "from sklearn.datasets import make_blobs\n",
- "from sklearn.model_selection import train_test_split\n",
- "from sklearn.metrics import roc_auc_score\n",
- "\n",
- "import dask_ml\n",
- "import dask_ml.datasets\n",
- "from dask_ml.wrappers import Incremental\n",
- "from dask.distributed import Client, LocalCluster\n",
- "from dask_ml.model_selection import train_test_split as dask_tts\n",
- "\n",
- "import dask as dd\n",
- "import pandas as pd"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Settings\n",
- "MAX_ESTIMATORS = 60 # Lower to run faster"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "distributed.comm.tcp - WARNING - Could not set timeout on TCP stream: [Errno 92] Protocol not available\n",
- "distributed.comm.tcp - WARNING - Could not set timeout on TCP stream: [Errno 92] Protocol not available\n",
- "distributed.comm.tcp - WARNING - Could not set timeout on TCP stream: [Errno 92] Protocol not available\n",
- "distributed.comm.tcp - WARNING - Could not set timeout on TCP stream: [Errno 92] Protocol not available\n",
- "distributed.comm.tcp - WARNING - Could not set timeout on TCP stream: [Errno 92] Protocol not available\n",
- "distributed.comm.tcp - WARNING - Could not set timeout on TCP stream: [Errno 92] Protocol not available\n",
- "distributed.comm.tcp - WARNING - Could not set timeout on TCP stream: [Errno 92] Protocol not available\n",
- "distributed.comm.tcp - WARNING - Could not set timeout on TCP stream: [Errno 92] Protocol not available\n"
- ]
- }
- ],
- "source": [
- "# Prepare dask cluster\n",
- "cluster = LocalCluster(processes=False,\n",
- " n_workers=2,\n",
- " threads_per_worker=2,\n",
- " scheduler_port=8383,\n",
- " diagnostics_port=8484)\n",
- "client = Client(cluster)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Synthetic data, in memory\n",
- "\n",
- "Compare increasing estimators with RandomForest (using warm_start) against Incremental StreamingRFC (dask handles .partial_fit).\n",
- "\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "distributed.comm.tcp - WARNING - Could not set timeout on TCP stream: [Errno 92] Protocol not available\n",
- "distributed.comm.tcp - WARNING - Could not set timeout on TCP stream: [Errno 92] Protocol not available\n",
- "distributed.comm.tcp - WARNING - Could not set timeout on TCP stream: [Errno 92] Protocol not available\n",
- "distributed.comm.tcp - WARNING - Could not set timeout on TCP stream: [Errno 92] Protocol not available\n"
- ]
- }
- ],
- "source": [
- "x, y = dask_ml.datasets.make_blobs(n_samples=1e5,\n",
- " chunks=1e4,\n",
- " random_state=0,\n",
- " n_features=40,\n",
- " centers=2,\n",
- " cluster_std=100)\n",
- "\n",
- "x_dd = dd.dataframe.from_array(x, \n",
- " chunksize=1e4)\n",
- "y_dd = dd.dataframe.from_array(y,\n",
- " chunksize=1e4)\n",
- "\n",
- "x_pd = pd.DataFrame(x.persist().compute())\n",
- "y_pd = pd.DataFrame(y.persist().compute())"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "30.517654418945312"
- ]
- },
- "execution_count": 6,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "x_pd.memory_usage(deep=True).sum() / 1024 /1024"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Standard random forest"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "distributed.comm.tcp - WARNING - Could not set timeout on TCP stream: [Errno 92] Protocol not available\n",
- "distributed.comm.tcp - WARNING - Could not set timeout on TCP stream: [Errno 92] Protocol not available\n",
- "distributed.comm.tcp - WARNING - Could not set timeout on TCP stream: [Errno 92] Protocol not available\n",
- "distributed.comm.tcp - WARNING - Could not set timeout on TCP stream: [Errno 92] Protocol not available\n"
- ]
- }
- ],
- "source": [
- "def score(mod, \n",
- " train: Tuple[np.array, np.array],\n",
- " test: Tuple[np.array, np.array],\n",
- " pr=False) -> Tuple[float, float]:\n",
- " \"\"\"\n",
- " Return ROC auc on x_train and x_test (from caller) on mod. Print if requested.\n",
- " \"\"\"\n",
- " y_pred_train_proba = mod.predict_proba(train[0])[:, 1]\n",
- " y_pred_test_proba = mod.predict_proba(test[0])[:, 1]\n",
- "\n",
- " roc_train = roc_auc_score(train[1], y_pred_train_proba)\n",
- " roc_test = roc_auc_score(test[1], y_pred_test_proba)\n",
- " if pr:\n",
- " print(f\"n_ests: {len(rfc.estimators_)}\")\n",
- " print(f'Train AUC: {roc_train}')\n",
- " print(f'Test AUC: {roc_test}')\n",
- " \n",
- " return roc_train, roc_test\n",
- "\n",
- "\n",
- "def score_dask(mod, \n",
- " train: Tuple[np.array, np.array],\n",
- " test: Tuple[np.array, np.array],\n",
- " pr=False) -> Tuple[float, float]:\n",
- " \"\"\"\n",
- " Score model using available dask metric (accuracy)\n",
- " \"\"\"\n",
- " roc_train = mod.score(train[0], train[1])\n",
- " roc_test = mod.score(test[0], test[1])\n",
- " if pr:\n",
- " print(f\"n_ests: {len(rfc.estimators_)}\")\n",
- " print(f'Train AUC: {roc_train}')\n",
- " print(f'Test AUC: {roc_test}')\n",
- " \n",
- " return roc_train, roc_test\n",
- "\n",
- "\n",
- "def multiple_fit(x: np.array, y: np.array,\n",
- " steps=np.arange(1, 101, 2),\n",
- " sample: int=1):\n",
- " \"\"\"\n",
- " Fit a random forest model with an increasing number of estimators.\n",
- " \n",
- " This version doesn't use warm start and refits the model from scratch each iteration.\n",
- " This is for the sake of comparing timings to dask function below.\n",
- " \n",
- " :param steps: Range to iterate over. Sets total number of estimators that will be fit in model\n",
- " after each iteration. Should be range with constant step size.\n",
- " :param sample: Proportion of randomly sampled training data to use on each partial_fit call.\n",
- " If sample = 1, all training data is used on each interation,\n",
- " so should behave as standard random forest. Default = 1 (100%).\n",
- " \"\"\"\n",
- " \n",
- " x_train, x_test, y_train, y_test = train_test_split(x, y, \n",
- " test_size=0.25,\n",
- " random_state=1)\n",
- " \n",
- " train_scores = []\n",
- " test_scores = []\n",
- " for s in steps:\n",
- " \n",
- " # Fit full model on each iteration\n",
- " rfc = RandomForestClassifier(warm_start=False)\n",
- " \n",
- " # Fit model with these n ests\n",
- " rfc.set_params(n_estimators=s)\n",
- " rfc.fit(x_train, y_train)\n",
- " \n",
- " tr_score, te_score = score(rfc, \n",
- " train=(x_train, y_train),\n",
- " test=(x_test, y_test),\n",
- " pr=False)\n",
- " \n",
- " train_scores.append(tr_score)\n",
- " test_scores.append(te_score)\n",
- " \n",
- " return rfc, train_scores, test_scores\n",
- "\n",
- "\n",
- "def plot_auc(steps, train_scores, test_scores):\n",
- " \"\"\"\n",
- " Plot the train and test auc scores vs total number of model estimators\n",
- " \"\"\"\n",
- " \n",
- " fig = plt.figure(figsize=(4, 4))\n",
- " plt.plot(steps, train_scores)\n",
- " plt.plot(steps, test_scores)\n",
- " plt.xlabel('n_estimators')\n",
- " plt.ylabel('auc')\n",
- " plt.legend(['train', 'test'])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {},
- "outputs": [],
- "source": [
- "steps = np.arange(1, MAX_ESTIMATORS, 4)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "CPU times: user 3min 30s, sys: 1.02 s, total: 3min 31s\n",
- "Wall time: 3min 37s\n",
- "With 57: 1.0 | 0.6337612567122703\n"
- ]
- },
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAARUAAAELCAYAAAD3MhIJAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAHP9JREFUeJzt3Xt4XXWd7/H3N5c2vV+S3tPSKBVbuQQI5XpGLiJtHQuIVorMox61OMrIeEZGekYZ5Hl8xGdGRx0RB7GiR2lBOEiVHmnBIo7cmkIKpS1t6S1poQ2l6ZW0TfI9f/xW2t2QljRda6/svT+v59ld1+z9TZP9yVq/tX6/be6OiEhcitIuQETyi0JFRGKlUBGRWClURCRWChURiZVCRURipVARkVgpVEQkVgoVEYlVSdoFHK+KigofP3582mWIFJylS5e+6e7D3m2/nAuV8ePHU1tbm3YZIgXHzDZ2ZT+d/ohIrBQqIhIrhYqIxEqhIiKxUqiISKwSCxUzm2Nm28xs+VG2m5n9yMzWmtlLZnZWUrWISPYkeaRyLzDlGNunAhOixyzgrgRrEZEsSew+FXd/yszGH2OXK4FfeRjP8lkzG2xmo9z99aRqyrbWNmd/Syv7D7bR3HF6sJX9LYenbe64gxNNHRxwdxwgcxscse/RHHOgUA0jKpG+vUq45uzK2J4vzZvfxgD1GcsN0bp3hIqZzSIczTBu3LisFNdVjbv3s3jVNhat3MqKLbuOCJGDrXrjSs83ZnCfvAmVLnP3u4G7AWpqalJ9p7o7q7fu4fGVW3l85Vbq6ptwh9GDyjinaij9epdQVlJM79KijGkRvUuLKSstonfJ4WnvkiLKSovpVVJEcZFhgFn7FAzDLLyuWefbDKJ/OmfH2GjH+DopHEUx/yKkGSqbgbEZy5XRuh7nYGsbz69/61CQ1L/1NgCnVw7iqx96H5dNHM6kUQMxvUtFUg2V+cCNZjYPOBfY2ZPaU3buO8iTq7exaMVW/ry6kd3NLfQuKeLCkyv4+w+ezGUThzNiYFnaZYr0OImFipnNBS4GKsysAfhXoBTA3X8KLACmAWuBfcBnk6rleDxSt5m5z29iyYYdtLY5Ff17MfXUkXxo4ggumlBB3145ccYokpokr/7MfJftDnw5qdfvjq27mrlpXh1VFf244W/ew4cmjaC6cjBFRTqtEekq/dnN8OKmJgC+N+MMzho3JOVqRHKTbtPPUFffRGmxMWnUwLRLEclZCpUMdfU7mDhqIGWlxWmXIpKzFCqR1jbn5YadVI8dnHYpIjlNoRJZs203ew+0KlRETpBCJVIXNdIqVEROjEIlsqyhiYFlJVRV9Eu7FJGcplCJvLipiTPGDtat9iInSKEC7N3fwuqtuzlTpz4iJ0yhAry8eSdtDtXjFCoiJ0qhQrjpDeCMSoWKyIlSqBCu/Iwb2pfy/r3TLkUk5ylUCFd+dClZJB4FHypbdzXz+s5mzlCoiMSi4EPlRd30JhKrgg+V9p7JHxitnskicVCoqGeySKwKOlTUM1kkfgUdKuqZLBK/gg6VZfVqpBWJW0GHSl196Jk8vlw9k0XiUtCh0t4zWaPli8SnYENFPZNFklGwoaKeySLJKNhQUc9kkWQUbqioZ7JIIgo2VJY1NKkToUgCCjJU2nsm6/4UkfgVZKioZ7JIcgoyVNQzWSQ5BRoq6pkskpSCCxX1TBZJVqKhYmZTzOxVM1trZrd0sv0kM3vCzF4ysyfNrDLJegDWbtvD3gOtuj9FJCGJhYqZFQN3AlOBScBMM5vUYbd/B37l7qcDtwPfSaqednX1OwDdSSuSlCSPVCYDa919nbsfAOYBV3bYZxLwp2h+cSfbY9feM7lKPZNFEpFkqIwB6jOWG6J1mZYBH4vmrwYGmFl5gjWpZ7JIwtJuqP0a8EEzexH4ILAZaO24k5nNMrNaM6ttbGzs9oupZ7JI8pIMlc3A2IzlymjdIe6+xd0/5u5nAv8SrWvq+ETufre717h7zbBhw7pdkHomiyQvyVBZAkwwsyoz6wVcC8zP3MHMKsysvYbZwJwE61HPZJEsSCxU3L0FuBF4DFgJPODur5jZ7WY2PdrtYuBVM1sNjAC+nVQ9EMakHTu0j3omiySoJMknd/cFwIIO627NmH8QeDDJGjLV1TdRM35otl5OpCCl3VCbNeqZLJIdBRMq6pkskh0FEyrqmSySHQUUKuqZLJINBREq7T2TdSlZJHkFESrtPZPVniKSvIIIFfVMFsmeAgkV9UwWyZaCCBX1TBbJnrwPFfVMFsmuvA+V9p7J+uAwkezI+1BZVq87aUWyKe9DpU49k0WyqiBCpXrskLTLECkYeR0q6pkskn15HSrqmSySfXkdKnX1TZQUqWeySDbldagsq29Sz2SRLMvbUGltc15qaNKpj0iW5W2oqGeySDryNlTUM1kkHXkcKuqZLJKGvA0V9UwWSUdehkp7z2S1p4hkX16GyvL2z0xWqIhkXV6GSp16JoukJi9DpaJ/b6adNlI9k0VSkOhnKaflmrMruebsyrTLEClIeXmkIiLpUaiISKwUKiISK4WKiMRKoSIisUo0VMxsipm9amZrzeyWTraPM7PFZvaimb1kZtOSrEdEkpdYqJhZMXAnMBWYBMw0s0kddvsG8IC7nwlcC/wkqXpEJDuSPFKZDKx193XufgCYB1zZYR8H2sd6HARsSbAeEcmCJENlDFCfsdwQrct0G3C9mTUAC4B/6OyJzGyWmdWaWW1jY2MStYpITNJuqJ0J3OvulcA04P+Y2Ttqcve73b3G3WuGDRuW9SJFpOuSDJXNwNiM5cpoXabPAQ8AuPszQBlQkWBNIpKwJENlCTDBzKrMrBehIXZ+h302AZcBmNlEQqjo/EYkhyUWKu7eAtwIPAasJFzlecXMbjez6dFu/wR8wcyWAXOBz7i7J1WTiCQv0V7K7r6A0ACbue7WjPkVwIVJ1iAi2ZV2Q62I5BmFiojESqEiIrHqUqiY2XlmNiBjeaCZnZtcWSKSq7p6pHIXsCdjeU+0TkTkCF0NFcu81OvubeTp+LYicmK6GirrzOwrZlYaPW4C1iVZmIjkpq6GyheBCwi32TcA5wKzkipKRHJXl05h3H0b4TZ7EZFj6lKomNkvCGOfHMHd/2fsFYlITutqY+sfMubLgKvRgEoi0omunv48lLlsZnOB/06kIhHJad29o3YCMDzOQkQkP3S1TWU3h9tUHNgK/HNSRYlI7urq6c8AMxtKOEIpa1+dWFUikrO6eqTyeeAmwpCQdcB5wDPApcmVJiK5qKttKjcB5wAb3f0S4EygKbGqRCRndTVUmt29GcDMerv7KuCU5MoSkVzV1ftUGsxsMPA7YJGZ7QA2JleWiOSqrjbUXh3N3mZmiwmfJvjHxKoSkZx13MMXuPufkyhERPKDhpMUkVgpVEQkVgoVEYmVQkVEYqVQEZFYKVREJFYKFRGJlUJFRGKlUBGRWClURCRWChURiVWioWJmU8zsVTNba2a3dLL9P8ysLnqsNjON0SKS4xL7PGQzKwbuBC4nfKrhEjOb7+4r2vdx969m7P8PhMGfRCSHJXmkMhlY6+7r3P0AMA+48hj7zwTmJliPiGRBkqEyBqjPWG6I1r2DmZ0EVAF/SrAeEcmCntJQey3woLu3drbRzGaZWa2Z1TY2Nma5NBE5HkmGymZgbMZyZbSuM9dyjFMfd7/b3WvcvWbYsGExligicUsyVJYAE8ysysx6EYJjfsedzOz9wBDCR36ISI5LLFTcvQW4EXgMWAk84O6vmNntZjY9Y9drgXnurg8nE8kDiV1SBnD3BcCCDutu7bB8W5I1iEh29ZSGWhHJEwoVEYmVQkVEYqVQEZFYKVREJFYKFRGJlUJFRGKlUBGRWClURCRWChURiZVCRURipVARkVgpVEQkVgoVEYmVQkVEYqVQEZFYKVREJFYKFRGJlUJFRGKlUBGRWClURCRWiY6mLyI91MFmeOs1eHMNHNwH1dfF9tQKFZF85Q57tsGbq2H7mhAgb64Jy02bgOijtvoNU6iIJG7/7vCGbGuB1oPQdhBaW6LpwbC+s23t64tKoaQMSnpnPMoOT4t7RcuZ6zLeju7Q1nr4+bz1yOVD89G0dT/s2BgFyNowfXMN7N91+DlL+0L5yVBZE0Kk/GSoeB+UvzfW/zqFihSug82wY314E25/LUzfWheme7Zmvx4rgqKSKETauv88A8dAxQQ4/ZMhNCqi8BgwGoqSb0ZVqEh+a22Bpo0hNN6KgmP7Wti+DnbWc+gUAKDf8PBXe8LlMPS9MHA0FJeGo472aVFxxrqSaF1JtC6aWnEIhpb90NKcMW2G1gMd1nWYth/lFJWE1yoqjuZLMtaVhNfIXFdcCoPGhqOP3v1T++8GhYrkA3fYtz0c7re3HWxfG6Y71oc3arveg0JwjDsPyj8V3oRD3xPWlQ1K73vIIwoVyR0Hm6PTk4zgaA+P5qbD+xX3Ckcaw06BiX8L5RNCaJSfDH3LwSy976EAKFSkZ3APwbBrC+zcDLuiR/v8jg3hdCWzrWHAqBAUp34sBEfFhLA8eFw4JZBUKFQkO1pbwlFG06Z3Bkb7/MG9R36NFYXgGDgaxpwNZ1wbhcfJUdvBgHS+FzkmhYrEb/9u2LoC3ngJ3ng5PLatCA2RhxgMGBkCY9j74eQPhfmBY2BQZZjvP/LIy6ySE/QTk+5zh92vR8GRESBvrefQVZU+Q2DkaXDO52HEqTC0KgTGgFHhioXkHYWKdN3urbDpadi89HCA7Nt+ePuQqhAgZ1wHI08N8wPHqGG0wCQaKmY2BfghUAzc4+53dLLPDOA2wp+2Ze4e3/3C0n3u4XLsxmdg49MhTN5aF7YV94bhE+GUaTDy9BAeIz4AZQPTrVl6hMRCxcyKgTuBy4EGYImZzXf3FRn7TABmAxe6+w4zG55UPfIu2lpDu8fGZ0KAbHwG9rwRtvUZAuPOh7M/CyddAKPO0KmLHFWSRyqTgbXuvg7AzOYBVwIrMvb5AnCnu+8AcPdtCdYjmVoOwJYXowB5GjY9B/t3hm0DK6Hqf4QgOekCqDglK7d3S35IMlTGAPUZyw3AuR32eR+Amf2VcIp0m7v/seMTmdksYBbAuHHjEik2Jx3YF+7f2L8LmndC864QDM27onXR+vb5zHUHdh9+nor3wQeuCgFy0gXhPg+Rbkq7obYEmABcDFQCT5nZae7elLmTu98N3A1QU1PjHZ+kIBzYGxpGt9TB63Vh+uarR+94VlQa2jh6DwzTskHQ7z1h2jtaHnlqOBrpV5Hd70XyWpKhshkYm7FcGa3L1AA85+4HgfVmtpoQMksSrKvn278nBEh7eLxeF7qytwdI/xEwqhomTQ9HGX2GZIRFFCSlfXTVRVKRZKgsASaYWRUhTK4FOl7Z+R0wE/iFmVUQTofWJVhTz9OyP7RtbH4h4whkNYfu8+g/EkZXw6SrwnRUNQwclWrJIseSWKi4e4uZ3Qg8RmgvmePur5jZ7UCtu8+Ptn3YzFYArcDN7r796M+aB/bvgYbnQ+PoxqehoTYMsAPhhrBR1aEvy6jqECIDRqZbrxxy8OBBGhoaaG5ufvedc1hZWRmVlZWUlnbvCp+551YTRU1NjdfW1qZdRtftews2PQsb/xpC5PVlYRQvKw6XZk+6ILRrVNYoQHq49evXM2DAAMrLy7E8PbV0d7Zv387u3bupqqo6YpuZLXX3mnd7jrQbavPPrtcPX6bd+HS49wPCDWOVNXDRV0OQjJ2sDnE5prm5mfHjx+dtoACYGeXl5TQ2Nnb7ORQqcXCHpb+Av/4o3IUK0Ks/jD03nMqcdCGMPgtKy9KtU05YPgdKuxP9HhUqJ+rg2/CHr8KyueE05pzPhyORkaerh63Eqqmpifvuu48vfelLx/V106ZN47777mPw4MEJVXYk/dafiB0b4P6/C5d/L/7f8Dc3685TSUxTUxM/+clP3hEqLS0tlJQc/a28YMGCpEs7gkKlu9Y+Dg99Ptw7ct398L4r0q5I8twtt9zCa6+9RnV1NaWlpZSVlTFkyBBWrVrF6tWrueqqq6ivr6e5uZmbbrqJWbNmATB+/Hhqa2vZs2cPU6dO5aKLLuLpp59mzJgxPPLII/Tp0yfWOhUqx6utDf77e/Cnb8PwSXDtr8PAyVJQvvX7V1ixZde773gcJo0eyL9+9ANH3X7HHXewfPly6urqePLJJ/nIRz7C8uXLD12lmTNnDkOHDuXtt9/mnHPO4ZprrqG8vPyI51izZg1z587lZz/7GTNmzOChhx7i+uuvj/X7UKgcj+ad8PDfw6uPwmmfgI/+EHr1S7sqKVCTJ08+4rLvj370Ix5++GEA6uvrWbNmzTtCpaqqiurqagDOPvtsNmzYEHtdCpWu2rYK7v9UGNVsyh1w7hd1G3wBO9YRRbb063f4D9qTTz7J448/zjPPPEPfvn25+OKLO71Jr3fv3ofmi4uLefvtt2OvS6HSFa88DL/7cjgq+fTvYfyFaVckBWjAgAHs3r270207d+5kyJAh9O3bl1WrVvHss89mubrDFCrH0toCT9wGT/8nVE6GGb9SvxtJTXl5ORdeeCGnnnoqffr0YcSIEYe2TZkyhZ/+9KdMnDiRU045hfPOOy+1OnWb/tHsaYQHPwsb/hLuPbniO1DSK/nXlR5r5cqVTJw4Me0ysqKz71W36Z+IhqXwwN+FQZ2vuguqNWyuSFcpVDpa+ktY8LXQue9zC0OnPxHpMoVKu7Y2ePzW0H7y3kvhmp9D36FpVyWScxQqEPrvPHwDrHgEzvkCTP2uPotXpJsUKnvfhLkzoWEJfPjbcP6Xdf+JyAko7FDZ/hr85uOwawvM+CVMujLtikRyXuF2qd30HNzzoXDr/ad/r0CRHq+9l3J3/OAHP2Dfvn0xV9S5wgyVVx6GX34U+gyGzy0Ko7CJ9HC5EiqFdfrjHq7uLPomjD0Prr0P+pW/+9eJ9ACZQx9cfvnlDB8+nAceeID9+/dz9dVX861vfYu9e/cyY8YMGhoaaG1t5Zvf/CZbt25ly5YtXHLJJVRUVLB48eJE6yycUGltgT9+HZbcEz7u4ur/0vCO0n3/75YwOFecRp4GU+846ubMoQ8WLlzIgw8+yPPPP4+7M336dJ566ikaGxsZPXo0jz76KBD6BA0aNIjvf//7LF68mIqK5D84rjBOfw7sDT2Ml9wDF3wFPv4LBYrktIULF7Jw4ULOPPNMzjrrLFatWsWaNWs47bTTWLRoEV//+tf5y1/+wqBBg7JeW/4fqex+A+77JLzxEnzke6Efj8iJOsYRRTa4O7Nnz+aGG254x7YXXniBBQsW8I1vfIPLLruMW2+9Nau15feRyraV4QrPm2tg5jwFiuS0zKEPrrjiCubMmcOePXsA2Lx5M9u2bWPLli307duX66+/nptvvpkXXnjhHV+btPw9Uln/FMy7PpzmfPZRGH1m2hWJnJDMoQ+mTp3Kddddx/nnnw9A//79+fWvf83atWu5+eabKSoqorS0lLvuuguAWbNmMWXKFEaPHp14Q21+Dn2w7H545MtQfjJ86rcweOyx9xfpAg19UMhDH1hR+OydGb8K96KISNbkZ6ic/gk49Rp9Bo9ICvL3XadAEUmF3nkixyHX2iC740S/R4WKSBeVlZWxffv2vA4Wd2f79u2UlXX/5tD8bFMRSUBlZSUNDQ00NjamXUqiysrKqKys7PbXJxoqZjYF+CFQDNzj7nd02P4Z4N+AzdGqH7v7PUnWJNJdpaWlR3wioHQusVAxs2LgTuByoAFYYmbz3X1Fh13vd/cbk6pDRLIryTaVycBad1/n7geAeYBGQhLJc0mGyhigPmO5IVrX0TVm9pKZPWhmuvVVJMel3VD7e2Cuu+83sxuAXwKXdtzJzGYBs6LFPWb26lGerwJ4M5FKu0f1HFtPqwd6Xk09qZ6TurJTYn1/zOx84DZ3vyJang3g7t85yv7FwFvu3u0BIMystit9E7JF9RxbT6sHel5NPa2erkjy9GcJMMHMqsysF3AtMD9zBzPL/LTz6cDKBOsRkSxI7PTH3VvM7EbgMcIl5Tnu/oqZ3Q7Uuvt84CtmNh1oAd4CPpNUPSKSHYm2qbj7AmBBh3W3ZszPBmbH+JJ3x/hccVA9x9bT6oGeV1NPq+dd5dx4KiLSs6nvj4jEKi9CxcymmNmrZrbWzG5JqYY5ZrbNzJZnrBtqZovMbE00HZLFesaa2WIzW2Fmr5jZTWnWZGZlZva8mS2L6vlWtL7KzJ6Lfnb3R436WWNmxWb2opn9Ie16zGyDmb1sZnVmVhutS+13qLtyPlQyugNMBSYBM81sUgql3AtM6bDuFuAJd58APBEtZ0sL8E/uPgk4D/hy9P+SVk37gUvd/QygGphiZucB3wX+w91PBnYAn8tSPe1u4sirjmnXc4m7V2dcRk7zd6h73D2nH8D5wGMZy7OB2SnVMh5YnrH8KjAqmh8FvJri/9MjhH5YqdcE9AVeAM4l3NhV0tnPMgt1VBLeqJcCfwAs5Xo2ABUd1qX+8zreR84fqdD17gBpGOHur0fzbwAj0ijCzMYDZwLPpVlTdKpRB2wDFgGvAU3u3hLtku2f3Q+AfwbaouXylOtxYKGZLY3uIoce8jt0PNK+Tb9guLubWdYvtZlZf+Ah4B/dfZeZpVaTu7cC1WY2GHgYeH+2XrsjM/tbYJu7LzWzi9Oqo4OL3H2zmQ0HFpnZqsyNaf0OHa98OFLZDGR2RKzk8PgsadvaftdwNN2WzRc3s1JCoPzG3f9vT6gJwN2bgMWE04vBZtb+xy2bP7sLgelmtoHQg/5Swtg/adWDu2+OptsIoTuZHvDzOl75ECrv2h0gRfOBT0fznya0a2SFhUOSnwMr3f37addkZsOiIxTMrA+hfWclIVw+nu163H22u1e6+3jC78yf3P1TadVjZv3MbED7PPBhYDkp/g51W9qNOjE1cE0DVhPO0f8lpRrmAq8DBwnn4p8jnKM/AawBHgeGZrGeiwjn6C8BddFjWlo1AacDL0b1LAdujda/B3geWAv8Fuidws/uYuAPadYTve6y6PFK++9xmr9D3X3ojloRiVU+nP6ISA+iUBGRWClURCRWChURiZVCRURipVARkVgpVCQWZlZtZtMylqfHNQyFmf2jmfWN47kkebpPRWIRfYRtjSfwaZPRrfQ17t7lj6ows2IPfY0ky3SkUmDMbLyZrTSzn0WDJS2MbpvvbN/3mtkfo16zfzGz90frP2Fmy6MBl56KukfcDnwyGmDok2b2GTP7cbT/vWZ2l5k9a2brzOziaFCrlWZ2b8br3WVmtR0GcfoKMBpYbGaLo3Uzo8GMlpvZdzO+fo+Zfc/MlgHnm9kd0SBVL5nZvyfzPyrvkPYtvXpk90EY86UFqI6WHwCuP8q+TwATovlzCf1jAF4GxkTzg6PpZ4AfZ3ztoWXCAFbzCOOVXAnsAk4j/FFbmlHL0GhaDDwJnB4tbyAaZ4QQMJuAYYRe9n8Croq2OTAjmi8njEVimXXqkfxDRyqFab2710XzSwlBc4RoyIQLgN9GY6D8F2GQIIC/Avea2RcIAdAVv/fw7n4Z2OruL7t7G6GfS/vrzzCzFwh9hD5AGMmvo3OAJ9290cO4J78B/iba1krolQ2wE2gGfm5mHwP2dbFOOUEaT6Uw7c+YbwU6O/0pIgxYVN1xg7t/0czOBT4CLDWzs4/jNds6vH4bUGJmVcDXgHPcfUd0WlTWhefN1OxRO4qHz52aDFxG6HV8I518pK7ET0cq0il33wWsN7NPQBhKwczOiObf6+7PefgMp0bCeDa7gQEn8JIDgb3ATjMbQRhzuF3mcz8PfNDMKqLxiWcCf+74ZNGR1iAPnz31VeCME6hNjoOOVORYPgXcZWbfAEoJ7SLLgH8zswmENpInonWbgFuiU6VOPy/7WNx9mZm9CKwiDA/614zNdwN/NLMt7n5JdKl6cfT6j7p7Z2OMDAAeMbOyaL//dbw1SffokrKIxEqnPyISK53+CGZ2J2HM1kw/dPdfpFGP5Dad/ohIrHT6IyKxUqiISKwUKiISK4WKiMRKoSIisfr/pCPlFP/s/+8AAAAASUVORK5CYII=\n",
- "text/plain": [
- ""
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "rfc = RandomForestClassifier(warm_start=True)\n",
- "\n",
- "%time rfc, train_scores, test_scores = multiple_fit(x_pd.values, y_pd.values.squeeze(), steps=steps)\n",
- "\n",
- "print(f\"With {len(rfc.estimators_)}: {train_scores[-1]} | {test_scores[-1]}\")\n",
- "plot_auc(steps, train_scores, test_scores)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Single incremental forest specs"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "((7500, 7500, 7500, 7500, 7500, 7500, 7500, 7500, 7500, 7500), (40,))"
- ]
- },
- "execution_count": 10,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "x_train, x_test, y_train, y_test = dask_tts(x, y, \n",
- " test_size=0.25)\n",
- "\n",
- "x_train.chunks"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### Incremental forest\n",
- "1 estimator per subset, 10 % per chunk, 1 pass through data.\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "distributed.comm.tcp - WARNING - Could not set timeout on TCP stream: [Errno 92] Protocol not available\n",
- "distributed.comm.tcp - WARNING - Could not set timeout on TCP stream: [Errno 92] Protocol not available\n",
- "distributed.comm.tcp - WARNING - Could not set timeout on TCP stream: [Errno 92] Protocol not available\n",
- "distributed.comm.tcp - WARNING - Could not set timeout on TCP stream: [Errno 92] Protocol not available\n",
- "/mnt/s/OneDrive/Matlab/dask tests/IncrementalTrees/incremental_trees/trees.py:199: RuntimeWarning: invalid value encountered in true_divide\n",
- " norm_prob = preds / counts\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "n_ests: 57\n",
- "Train AUC: 0.6658850266643547\n",
- "Test AUC: 0.566714711139625\n"
- ]
- }
- ],
- "source": [
- "srfc = Incremental(StreamingRFC(n_estimators_per_chunk=1,\n",
- " max_n_estimators=np.inf))\n",
- "\n",
- "srfc.fit(x_train, y_train,\n",
- " classes=[0, 1])\n",
- "\n",
- "tr_score, te_score = score(srfc, \n",
- " train=(x_train, y_train),\n",
- " test=(x_test, y_test),\n",
- " pr=True)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### Incremental forest\n",
- "20 estimators per subset (different features), 10 % per chunk, 1 pass through data."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 12,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "n_ests: 57\n",
- "Train AUC: 0.8403617676637958\n",
- "Test AUC: 0.6507955222895951\n"
- ]
- }
- ],
- "source": [
- "srfc = Incremental(StreamingRFC(n_estimators_per_chunk=20,\n",
- " max_n_estimators=np.inf))\n",
- "\n",
- "srfc.fit(x_train, y_train,\n",
- " classes=[0, 1])\n",
- "\n",
- "tr_score, te_score = score(srfc, \n",
- " train=(x_train, y_train),\n",
- " test=(x_test, y_test),\n",
- " pr=True)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### Forest of partial decision trees\n",
- "1 estimator per subset with all features, 10 % per chunk, 1 pass through data."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 13,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "n_ests: 57\n",
- "Train AUC: 0.6702321483770426\n",
- "Test AUC: 0.5732644847212355\n"
- ]
- }
- ],
- "source": [
- "srfc = Incremental(StreamingRFC(n_estimators_per_chunk=1,\n",
- " max_n_estimators=np.max(steps),\n",
- " max_features=x.shape[1]))\n",
- "\n",
- "srfc.fit(x_train, y_train,\n",
- " classes=[0, 1])\n",
- "\n",
- "tr_score, te_score = score(srfc, \n",
- " train=(x_train, y_train),\n",
- " test=(x_test, y_test),\n",
- " pr=True)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### Forest of partial decision trees\n",
- "20 estimator per subset with all features, 10 % per chunk, 1 pass through data.\n",
- "\n",
- "Extra estimators shouldn't help here?"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 14,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "n_ests: 57\n",
- "Train AUC: 0.7542754850739607\n",
- "Test AUC: 0.6273808721369764\n"
- ]
- }
- ],
- "source": [
- "srfc = Incremental(StreamingRFC(n_estimators_per_chunk=20,\n",
- " max_n_estimators=np.max(steps),\n",
- " max_features=x.shape[1]))\n",
- "\n",
- "srfc.fit(x_train, y_train,\n",
- " classes=[0, 1])\n",
- "\n",
- "tr_score, te_score = score(srfc, \n",
- " train=(x_train, y_train),\n",
- " test=(x_test, y_test),\n",
- " pr=True)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### n estimators per chunk vs performance\n",
- "\n",
- "Effect of increasing estimators per subset (with different set ups)\n",
- "\n",
- "Function here add Incremental to supplied model, and uses .fit to refit the full model in each iteration.\n",
- "\n",
- "The other functions (above and in PerformanceComparisons.ipynb) do incremental fits using warm start (either directly or via .partial_fit). \n",
- "\n",
- "This means the timing information cannot be directly compared!"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 15,
- "metadata": {},
- "outputs": [],
- "source": [
- "def multiple_dask_fit(x: np.ndarray, y:np.ndarray,\n",
- " steps=np.arange(1, 101, 2),\n",
- " **kwargs) -> None:\n",
- " \n",
- " \"\"\"\n",
- " Fit increasing number of estimators using .partial_fit on a subsample of the training data.\n",
- " Uses Dask by adding Incremental to model and calling fit. This refits the whole model one each\n",
- " iteration, so will be slower than the other test functions. Timing this function can only be compared\n",
- " to other calls of this function.\n",
- " \n",
- " The data passed to the Random forest fit by partial_fit is handled by dask and is sequential batches\n",
- " of data, rather than random samples (as used by inc_partial_fit in PerformanceComparisons.ipynb).\n",
- " \n",
- " StreamingRFC.n_estimators: Number of estimators that will be fit in each step. Set from first\n",
- " difference in range (ie. range[1]-range[0])\n",
- " StreamingRFC.max_n_estimators: Limit on number of estimators than will be fit in model. Should >\n",
- " range[-1].\n",
- " \n",
- " :param steps: Range to iterate over. Sets total number of estimators that will be fit in model\n",
- " after each iteration. Should be range with constant step size.\n",
- " \"\"\"\n",
- " \n",
- " \n",
- " x_train, x_test, y_train, y_test = dask_tts(x, y, \n",
- " test_size=0.25)\n",
- " \n",
- " n_train = x_train.shape[0]\n",
- " \n",
- " train_scores = []\n",
- " test_scores = []\n",
- " for s in steps:\n",
- " \n",
- " # Create fresh model each iteration\n",
- " srfc_ = StreamingRFC(n_estimators_per_chunk=s,\n",
- " max_n_estimators=np.inf,\n",
- " **kwargs)\n",
- " \n",
- " \n",
- " # Add Incremental\n",
- " srfc_ = Incremental(srfc_)\n",
- " \n",
- " # Fit model with these n ests\n",
- " # From scratch each time\n",
- " srfc_.fit(x_train, y_train,\n",
- " classes=[0, 1])\n",
- " \n",
- " tr_score, te_score = score(srfc_,\n",
- " train=(x_train, y_train),\n",
- " test=(x_test, y_test),\n",
- " pr=False)\n",
- " train_scores.append(tr_score)\n",
- " test_scores.append(te_score)\n",
- " \n",
- " return srfc_, train_scores, test_scores"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### Incremental forest\n",
- "*range* estimators per subset (different features), 10 % per chunk, 1 pass through data."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 16,
- "metadata": {},
- "outputs": [
- {
- "ename": "RuntimeError",
- "evalue": "Cannot clone object StreamingRFC(bootstrap=True, class_weight=None, criterion='gini',\n max_depth=None, max_features='auto', max_leaf_nodes=None,\n max_n_estimators=inf, min_impurity_decrease=0.0,\n min_impurity_split=None, min_samples_leaf=1, min_samples_split=2,\n min_weight_fraction_leaf=0.0, n_estimators=1,\n n_estimators_per_chunk=1, n_jobs=None, oob_score=False,\n random_state=None, verbose=0, warm_start=True), as the constructor either does not set or modifies parameter n_estimators",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)",
- "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n",
- "\u001b[0;32m\u001b[0m in \u001b[0;36mmultiple_dask_fit\u001b[0;34m(x, y, steps, **kwargs)\u001b[0m\n\u001b[1;32m 43\u001b[0m \u001b[0;31m# From scratch each time\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 44\u001b[0m srfc_.fit(x_train, y_train,\n\u001b[0;32m---> 45\u001b[0;31m classes=[0, 1])\n\u001b[0m\u001b[1;32m 46\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 47\u001b[0m tr_score, te_score = score(srfc_,\n",
- "\u001b[0;32m/mnt/s/OneDrive/Matlab/dask tests/IncrementalTrees/pc_env_linux/lib/python3.6/site-packages/dask_ml/wrappers.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, **fit_kwargs)\u001b[0m\n\u001b[1;32m 461\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 462\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mfit_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 463\u001b[0;31m \u001b[0mestimator\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbase\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclone\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mestimator\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 464\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_fit_for_estimator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mestimator\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mfit_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 465\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m/mnt/s/OneDrive/Matlab/dask tests/IncrementalTrees/pc_env_linux/lib/python3.6/site-packages/sklearn/base.py\u001b[0m in \u001b[0;36mclone\u001b[0;34m(estimator, safe)\u001b[0m\n\u001b[1;32m 71\u001b[0m raise RuntimeError('Cannot clone object %s, as the constructor '\n\u001b[1;32m 72\u001b[0m \u001b[0;34m'either does not set or modifies parameter %s'\u001b[0m \u001b[0;34m%\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 73\u001b[0;31m (estimator, name))\n\u001b[0m\u001b[1;32m 74\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mnew_object\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 75\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;31mRuntimeError\u001b[0m: Cannot clone object StreamingRFC(bootstrap=True, class_weight=None, criterion='gini',\n max_depth=None, max_features='auto', max_leaf_nodes=None,\n max_n_estimators=inf, min_impurity_decrease=0.0,\n min_impurity_split=None, min_samples_leaf=1, min_samples_split=2,\n min_weight_fraction_leaf=0.0, n_estimators=1,\n n_estimators_per_chunk=1, n_jobs=None, oob_score=False,\n random_state=None, verbose=0, warm_start=True), as the constructor either does not set or modifies parameter n_estimators"
- ]
- },
- {
- "ename": "NameError",
- "evalue": "name 'final_est' is not defined",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
- "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mget_ipython\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_line_magic\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'time'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'final_est, train_scores, test_scores = multiple_dask_fit(x, y, steps=steps)'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"With {len(final_est.estimators_)}: {train_scores[-1]} | {test_scores[-1]}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0mplot_auc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msteps\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtrain_scores\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest_scores\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;31mNameError\u001b[0m: name 'final_est' is not defined"
- ]
- }
- ],
- "source": [
- "steps = np.arange(1, MAX_ESTIMATORS, 6)\n",
- "\n",
- "%time final_est, train_scores, test_scores = multiple_dask_fit(x, y, steps=steps)\n",
- "print(f\"With {len(final_est.estimators_)}: {train_scores[-1]} | {test_scores[-1]}\")\n",
- "plot_auc(steps, train_scores, test_scores)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### Forest of partial decision trees\n",
- "*range* estimators per subset with all features, 10 % per chunk, 1 pass through data."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "steps = np.arange(1, MAX_ESTIMATORS, 6)\n",
- "\n",
- "%time final_est, train_scores, test_scores = multiple_dask_fit(x, y, steps=steps, max_features=x.shape[1])\n",
- "print(f\"With {len(final_est.estimators_)}: {train_scores[-1]} | {test_scores[-1]}\")\n",
- "plot_auc(steps, train_scores, test_scores)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.6.7"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/requirements.txt b/requirements.txt
index 0ce12d8..30de622 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,14 +1,8 @@
-scikit-learn>=0.22
-pandas
+scikit-learn==1.2
numpy
-dask>=2
+dask==2022.12
dask-glm==0.2.0
-dask-ml>=1
-distributed>=2
+dask-ml==2022.5.27
+distributed==2022.12
bokeh
-pytest
-jupyter
-jupyterlab
-ipykernel
-matplotlib
fsspec
diff --git a/notes/EquivRows.ipynb b/scripts/EquivRows.ipynb
similarity index 70%
rename from notes/EquivRows.ipynb
rename to scripts/EquivRows.ipynb
index 6252378..4cbf6ba 100644
--- a/notes/EquivRows.ipynb
+++ b/scripts/EquivRows.ipynb
@@ -3,7 +3,11 @@
{
"cell_type": "code",
"execution_count": 1,
- "metadata": {},
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
"outputs": [],
"source": [
"# Change dir to repo root if running from repo (rather than pip installed)\n",
@@ -17,46 +21,48 @@
},
{
"cell_type": "code",
- "execution_count": 5,
- "metadata": {},
+ "execution_count": 4,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
"outputs": [],
"source": [
- "import matplotlib.pyplot as plt\n",
"import numpy as np\n",
- "import math\n",
"\n",
- "from typing import Tuple\n",
"\n",
"from incremental_trees.trees import StreamingRFC\n",
"\n",
- "from sklearn.ensemble import RandomForestClassifier\n",
- "from sklearn.datasets import make_blobs\n",
- "from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import roc_auc_score\n",
- "from sklearn.ensemble.forest import RandomForestClassifier\n",
+ "from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.datasets import load_breast_cancer\n",
"from sklearn.model_selection import train_test_split\n",
- "from sklearn.linear_model import LogisticRegression\n",
- "from sklearn.metrics.classification import classification_report\n",
- "from sklearn.base import clone"
+ "from sklearn.metrics import classification_report"
]
},
{
"cell_type": "code",
- "execution_count": 6,
- "metadata": {},
+ "execution_count": 5,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
"outputs": [],
"source": [
"x, y = load_breast_cancer(return_X_y=True)\n",
- "x_train, x_test, y_train, y_test = train_test_split(x, y,\n",
- " test_size=0.25,\n",
- " random_state=123)"
+ "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=123)"
]
},
{
"cell_type": "code",
- "execution_count": 24,
- "metadata": {},
+ "execution_count": 6,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
"outputs": [],
"source": [
"def fit_srfc(srfc, x, y,\n",
@@ -123,7 +129,11 @@
},
{
"cell_type": "markdown",
- "metadata": {},
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
"source": [
"# 10 full trees vs equivilents\n",
"RFC: 10 tress with 100%\n",
@@ -134,8 +144,12 @@
},
{
"cell_type": "code",
- "execution_count": 32,
- "metadata": {},
+ "execution_count": 7,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -143,15 +157,15 @@
"text": [
" precision recall f1-score support\n",
"\n",
- " 0 1.00 0.98 0.99 54\n",
- " 1 0.99 1.00 0.99 89\n",
+ " 0 0.96 0.98 0.97 54\n",
+ " 1 0.99 0.98 0.98 89\n",
"\n",
- " micro avg 0.99 0.99 0.99 143\n",
- " macro avg 0.99 0.99 0.99 143\n",
- "weighted avg 0.99 0.99 0.99 143\n",
+ " accuracy 0.98 143\n",
+ " macro avg 0.98 0.98 0.98 143\n",
+ "weighted avg 0.98 0.98 0.98 143\n",
"\n",
- "Train AUC: 0.9998583034196108\n",
- "Test AUC: 0.9886600083229296\n"
+ "Train AUC: 0.9999763839032684\n",
+ "Test AUC: 0.9888680815647108\n"
]
}
],
@@ -163,8 +177,12 @@
},
{
"cell_type": "code",
- "execution_count": 33,
- "metadata": {},
+ "execution_count": 8,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -172,15 +190,15 @@
"text": [
" precision recall f1-score support\n",
"\n",
- " 0 0.91 0.91 0.91 54\n",
- " 1 0.94 0.94 0.94 89\n",
+ " 0 0.98 0.94 0.96 54\n",
+ " 1 0.97 0.99 0.98 89\n",
"\n",
- " micro avg 0.93 0.93 0.93 143\n",
- " macro avg 0.93 0.93 0.93 143\n",
- "weighted avg 0.93 0.93 0.93 143\n",
+ " accuracy 0.97 143\n",
+ " macro avg 0.97 0.97 0.97 143\n",
+ "weighted avg 0.97 0.97 0.97 143\n",
"\n",
- "Train AUC: 0.9814495560173814\n",
- "Test AUC: 0.9889721181856014\n"
+ "Train AUC: 0.9830554505951257\n",
+ "Test AUC: 0.987411568872243\n"
]
}
],
@@ -196,7 +214,11 @@
},
{
"cell_type": "markdown",
- "metadata": {},
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
"source": [
"## vs SRFC: 100 x 1 x 0.1 vs 10\n",
"100 fits with 1 tree on 10% of data each"
@@ -204,8 +226,12 @@
},
{
"cell_type": "code",
- "execution_count": 34,
- "metadata": {},
+ "execution_count": 9,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -213,15 +239,15 @@
"text": [
" precision recall f1-score support\n",
"\n",
- " 0 0.98 0.93 0.95 54\n",
- " 1 0.96 0.99 0.97 89\n",
+ " 0 0.98 0.96 0.97 54\n",
+ " 1 0.98 0.99 0.98 89\n",
"\n",
- " micro avg 0.97 0.97 0.97 143\n",
- " macro avg 0.97 0.96 0.96 143\n",
- "weighted avg 0.97 0.97 0.96 143\n",
+ " accuracy 0.98 143\n",
+ " macro avg 0.98 0.98 0.98 143\n",
+ "weighted avg 0.98 0.98 0.98 143\n",
"\n",
- "Train AUC: 0.9757580767050822\n",
- "Test AUC: 0.9916770703287556\n"
+ "Train AUC: 0.975769884753448\n",
+ "Test AUC: 0.9938618393674573\n"
]
}
],
@@ -237,7 +263,11 @@
},
{
"cell_type": "markdown",
- "metadata": {},
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
"source": [
"## vs SRFC: 100 x 10 x 0.1 vs 10\n",
"100 fits with 1 tree on 10% of data each"
@@ -245,8 +275,12 @@
},
{
"cell_type": "code",
- "execution_count": 35,
- "metadata": {},
+ "execution_count": 10,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -254,15 +288,15 @@
"text": [
" precision recall f1-score support\n",
"\n",
- " 0 0.98 0.83 0.90 54\n",
- " 1 0.91 0.99 0.95 89\n",
+ " 0 1.00 0.93 0.96 54\n",
+ " 1 0.96 1.00 0.98 89\n",
"\n",
- " micro avg 0.93 0.93 0.93 143\n",
- " macro avg 0.94 0.91 0.92 143\n",
- "weighted avg 0.93 0.93 0.93 143\n",
+ " accuracy 0.97 143\n",
+ " macro avg 0.98 0.96 0.97 143\n",
+ "weighted avg 0.97 0.97 0.97 143\n",
"\n",
- "Train AUC: 0.967846684300019\n",
- "Test AUC: 0.9850187265917603\n"
+ "Train AUC: 0.990459096920461\n",
+ "Test AUC: 0.985538909696213\n"
]
}
],
@@ -278,7 +312,11 @@
},
{
"cell_type": "markdown",
- "metadata": {},
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
"source": [
"## vs SRFC: 100 x 1 x 0.1 vs 10 (all features per tree)\n",
"100 fits with 1 tree on 10% of data each"
@@ -286,8 +324,12 @@
},
{
"cell_type": "code",
- "execution_count": 49,
- "metadata": {},
+ "execution_count": 11,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -295,15 +337,15 @@
"text": [
" precision recall f1-score support\n",
"\n",
- " 0 0.98 0.93 0.95 54\n",
- " 1 0.96 0.99 0.97 89\n",
+ " 0 1.00 0.91 0.95 54\n",
+ " 1 0.95 1.00 0.97 89\n",
"\n",
- " micro avg 0.97 0.97 0.97 143\n",
- " macro avg 0.97 0.96 0.96 143\n",
+ " accuracy 0.97 143\n",
+ " macro avg 0.97 0.95 0.96 143\n",
"weighted avg 0.97 0.97 0.96 143\n",
"\n",
- "Train AUC: 0.9817801813716229\n",
- "Test AUC: 0.9847066167290888\n"
+ "Train AUC: 0.9882155677309654\n",
+ "Test AUC: 0.987411568872243\n"
]
}
],
@@ -320,7 +362,11 @@
},
{
"cell_type": "markdown",
- "metadata": {},
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
"source": [
"## vs SRFC: 33 x 3 x 0.1 vs 10 (sampled features per tree)\n",
"100 fits with 1 tree on 10% of data each"
@@ -328,8 +374,12 @@
},
{
"cell_type": "code",
- "execution_count": 48,
- "metadata": {},
+ "execution_count": 12,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -337,15 +387,15 @@
"text": [
" precision recall f1-score support\n",
"\n",
- " 0 0.98 0.98 0.98 54\n",
- " 1 0.99 0.99 0.99 89\n",
+ " 0 1.00 0.94 0.97 54\n",
+ " 1 0.97 1.00 0.98 89\n",
"\n",
- " micro avg 0.99 0.99 0.99 143\n",
- " macro avg 0.99 0.99 0.99 143\n",
- "weighted avg 0.99 0.99 0.99 143\n",
+ " accuracy 0.98 143\n",
+ " macro avg 0.98 0.97 0.98 143\n",
+ "weighted avg 0.98 0.98 0.98 143\n",
"\n",
- "Train AUC: 0.9844606083506519\n",
- "Test AUC: 0.9887640449438202\n"
+ "Train AUC: 0.9930096353674664\n",
+ "Test AUC: 0.9985434873075323\n"
]
}
],
@@ -362,7 +412,11 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
"outputs": [],
"source": []
}
@@ -388,4 +442,4 @@
},
"nbformat": 4,
"nbformat_minor": 2
-}
+}
\ No newline at end of file
diff --git a/scripts/InconsistentClasses.ipynb b/scripts/InconsistentClasses.ipynb
new file mode 100644
index 0000000..cf3af75
--- /dev/null
+++ b/scripts/InconsistentClasses.ipynb
@@ -0,0 +1,141 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n",
+ "is_executing": true
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# Change dir to repo root if running from repo (rather than pip installed)\n",
+ "# (Assuming running from [repo]/scripts/)\n",
+ "import os\n",
+ "os.chdir('../')\n",
+ "\n",
+ "%load_ext autoreload\n",
+ "%autoreload 2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n",
+ "is_executing": true
+ }
+ },
+ "outputs": [],
+ "source": [
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n",
+ "is_executing": true
+ }
+ },
+ "outputs": [],
+ "source": [
+ "data= pd.DataFrame({'a': (1, 2, 3, 4, 5), \n",
+ " 'b': (1, 2, 3, 4, 5),\n",
+ " 'c': (1, 2, 3, 4, 5),\n",
+ " 'target': (1, 1, 2, 2, 3)})\n",
+ "\n",
+ "data = pd.concat((data, data), axis=0).sort_values('target').reset_index(drop=True)\n",
+ "\n",
+ "x = data[[c for c in data if c != 'target']]\n",
+ "y = data['target']\n",
+ "\n",
+ "data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n",
+ "is_executing": true
+ }
+ },
+ "outputs": [],
+ "source": [
+ "from incremental_trees.trees import StreamingRFC\n",
+ "\n",
+ "srfc = StreamingRFC()\n",
+ "srfc.partial_fit(x[0:3], y[0:3], # No 3s\n",
+ " classes=y.unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n",
+ "is_executing": true
+ }
+ },
+ "outputs": [],
+ "source": [
+ "srfc.partial_fit(x, y)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n",
+ "is_executing": true
+ }
+ },
+ "outputs": [],
+ "source": [
+ "srfc.predict(x)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "outputs": [],
+ "source": [],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "name": "python3",
+ "language": "python",
+ "display_name": "Python 3 (ipykernel)"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.0"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
\ No newline at end of file
diff --git a/scripts/PerformanceComparisons.ipynb b/scripts/PerformanceComparisons.ipynb
new file mode 100644
index 0000000..f54130d
--- /dev/null
+++ b/scripts/PerformanceComparisons.ipynb
@@ -0,0 +1,985 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "# Performamce comparison\n",
+ "\n",
+ "In memory, no dask."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# Change dir to repo root if running from repo (rather than pip installed)\n",
+ "# (Assuming running from [repo]/notes/)\n",
+ "import os\n",
+ "os.chdir('../')\n",
+ "\n",
+ "%load_ext autoreload\n",
+ "%autoreload 2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "import matplotlib.pyplot as plt\n",
+ "import numpy as np\n",
+ "\n",
+ "from typing import Tuple\n",
+ "\n",
+ "from incremental_trees.trees import StreamingRFC\n",
+ "\n",
+ "from sklearn.ensemble import RandomForestClassifier\n",
+ "from sklearn.datasets import make_blobs\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "from sklearn.metrics import roc_auc_score"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# Settings\n",
+ "MAX_ESTIMATORS = 120 # Lower to run faster"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "# Synthetic data\n",
+ "\n",
+ "20000 samples, 2 classes, 40 features."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "x, y = make_blobs(\n",
+ " n_samples=20000,\n",
+ " centers=2,\n",
+ " cluster_std=100,\n",
+ " n_features=40,\n",
+ " random_state=0\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "## Default params"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "### Standard random forest"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "def score(mod,\n",
+ " train: Tuple[np.array, np.array],\n",
+ " test: Tuple[np.array, np.array],\n",
+ " pr=False) -> Tuple[float, float]:\n",
+ " \"\"\"\n",
+ " Return ROC auc on x_train and x_test (from caller) on mod. Print if requested.\n",
+ " \"\"\"\n",
+ " y_pred_train_proba = mod.predict_proba(train[0])[:, 1]\n",
+ " y_pred_test_proba = mod.predict_proba(test[0])[:, 1]\n",
+ "\n",
+ " roc_train = roc_auc_score(train[1], y_pred_train_proba)\n",
+ " roc_test = roc_auc_score(test[1], y_pred_test_proba)\n",
+ " if pr:\n",
+ " print(f\"n_ests: {len(rfc.estimators_)}\")\n",
+ " print(f'Train AUC: {roc_train}')\n",
+ " print(f'Test AUC: {roc_test}')\n",
+ "\n",
+ " return roc_train, roc_test\n",
+ "\n",
+ "\n",
+ "def inc_fit(x: np.array, y: np.array, rfc=None, steps=np.arange(1, 101, 2),\n",
+ " sample: int = 1):\n",
+ " \"\"\"\n",
+ " Fit a random forest model with an increasing number of estimators.\n",
+ " \n",
+ " Uses .fit with warm_start=True.\n",
+ " \n",
+ " :param rfc: RFC model to test. Default = None (use example with default RFC params).\n",
+ " If model is supplied, the .n_estimators param will be ignored and managed here.\n",
+ " :param steps: Range to iterate over. Sets total number of estimators that will be fit in model\n",
+ " after each iteration. Should be range with constant step size.\n",
+ " :param sample: Proportion of randomly sampled training data to use on each partial_fit call.\n",
+ " If sample = 1, all training data is used on each interation,\n",
+ " so should behave as standard random forest. Default = 1 (100%).\n",
+ " \"\"\"\n",
+ "\n",
+ " x_train, x_test, y_train, y_test = train_test_split(\n",
+ " x,\n",
+ " y,\n",
+ " test_size=0.25,\n",
+ " random_state=1\n",
+ " )\n",
+ "\n",
+ " if rfc is None:\n",
+ " rfc = RandomForestClassifier(warm_start=True)\n",
+ "\n",
+ " train_scores = []\n",
+ " test_scores = []\n",
+ " for s in steps:\n",
+ " # Fit model with these n ests\n",
+ " rfc.set_params(n_estimators=s)\n",
+ " rfc.fit(x_train, y_train)\n",
+ "\n",
+ " tr_score, te_score = score(\n",
+ " rfc,\n",
+ " train=(x_train, y_train),\n",
+ " test=(x_test, y_test),\n",
+ " pr=False\n",
+ " )\n",
+ " train_scores.append(tr_score)\n",
+ " test_scores.append(te_score)\n",
+ "\n",
+ " return train_scores, test_scores\n",
+ "\n",
+ "\n",
+ "def plot_auc(steps, train_scores, test_scores):\n",
+ " \"\"\"\n",
+ " Plot the train and test auc scores vs total number of model estimators\n",
+ " \"\"\"\n",
+ "\n",
+ " plt.figure(figsize=(4, 4))\n",
+ " plt.plot(steps, train_scores)\n",
+ " plt.plot(steps, test_scores)\n",
+ " plt.xlabel('n_estimators')\n",
+ " plt.ylabel('auc')\n",
+ " plt.legend(['train', 'test'])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "CPU times: total: 23.8 s\n",
+ "Wall time: 24.1 s\n",
+ "With 119: 1.0 | 0.6391245961589661\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": "",
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAFzCAYAAAA5RGIiAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8o6BhiAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA1KElEQVR4nO3de1xUZf4H8M/MwAzXAbnITQQVvOMNhNDarCjSorRtNbVUdq211VJZNzXN6yZtltmau9ZvvbRtpVZ2N9MwM43FK5Sp4A1xlYtIMoDAwMzz++PA2AgoDANnhvm8X6/zYubMOTPfQ/aZh+c85zkKIYQAERE5DKXcBRARUfti8BMRORgGPxGRg2HwExE5GAY/EZGDYfATETkYBj8RkYNh8BMRORgnuQtob0ajEZcuXYKnpycUCoXc5RARtZoQAmVlZQgODoZSeev2vMMF/6VLlxAaGip3GUREVnfhwgV06dLllts5XPB7enoCkH5BWq1W5mqIiFpPp9MhNDTUlG+34nDBX9+9o9VqGfxE1KE0t/uaJ3eJiBwMg5+IyMEw+ImIHAyDn4jIwTD4iYgcDIOfiMjBMPiJiByMrMG/d+9eJCUlITg4GAqFAp988skt99mzZw+GDBkCjUaDiIgIbNq0qc3rJCLqSGQN/oqKCgwcOBBr165t1vbnzp3DAw88gLvuuguZmZmYNWsWpk6diq+//rqNKyUi6jhkvXJ35MiRGDlyZLO3X7duHbp164ZXX30VANCnTx/s27cPr732GhITE9uqzDZXVWNAka4av1zTo7rWiOpaA6prjKiuNaLWaEStQcBgFKg1ChiMRhiMAgKAEIBRCNP7CAFIr0iPb6UZmxCRDB4eFIwgL9c2e3+7mrIhPT0dCQkJZusSExMxa9asJveprq5GdXW16blOp2ur8prl0tVKfPHjJXx/qhj5pVUo0lVBV1Ura01EZFtiwjox+OsVFBQgICDAbF1AQAB0Oh0qKyvh6trwF5WamoqlS5e2V4mNKqnQY/tP+fgs8xIO5JY0uo2LsxI+bmq4qFVQq5TQOKugcVLCWaWAk1IJJ6UCSqUCKoUCKqUCUABKhQIKAArpKRR1zwEACuBXz4jIjvh6aNr0/e0q+C0xf/58pKSkmJ7Xz2LXXnafLMSf3j2CqhqjaV1sNx88OCAIEf4e6KzVwN/TBVoXJ94fgIjahV0Ff2BgIAoLC83WFRYWQqvVNtraBwCNRgONpm2/PZty+HyJKfR7B3rikSEheHBAMIK92+5POCKiW7Gr4I+Pj8f27dvN1u3atQvx8fEyVdS0nMIy/H7TIVTVGHFXL3+8NSkGzipeNkFE8pM1icrLy5GZmYnMzEwA0nDNzMxM5OXlAZC6aSZNmmTaftq0aTh79iyee+45nDx5Ev/4xz+wdetWzJ49W47ym3TxaiUmrT+A0soaDO7qjbUThzD0ichmyJpGhw4dwuDBgzF48GAAQEpKCgYPHoxFixYBAPLz801fAgDQrVs3fPnll9i1axcGDhyIV199Ff/6179saijnLxV6TFqfgQJdFSI6e2DD5KFwU9vVH1ZE1MEphGjOiO+OQ6fTwcvLC6WlpVa/A5fBKDD2zXQcPv8Lgrxc8NHTw9ifT0RtrqW5xv4HKzqa9wsOn/8F7moV3vlDLEOfiGwSg9+K9p++AgAY0aszIjo376bHRETtjcFvRftPFwMAhkf4yVwJEVHTGPxWUlFdiyN5vwAAbmfwE5ENY/BbyYFzJag1CoT6uKKrr5vc5RARNYnBbyX76rp52NonIlvH4LeS+v79YT0Y/ERk2xj8VnC5rBonC8oAAMN6+MpcDRHRzTH4reCHM1Jrv2+Qts2nUyUiai0GvxXUd/PcHsluHiKyfQz+VhJCYN8pjt8nIvvB4G+l3CvXcKm0Cs4qBYaGd5K7HCKiW2Lwt1L9MM4hXTtxFk4isgsM/lb6geP3icjOMPhbwWAU+OGMNDHbcJ7YJSI7weBvhZ8vlaK0sgaeGicMCPGSuxwiomZh8LdCff9+XHdfOPHWikRkJ5hWrWAavx/Bq3WJyH4w+FvhZL40TUNMuI/MlRARNR+D30JCCOiqagAAPu5qmashImo+Br+FqmqMqDFI96nXujrLXA0RUfMx+C1UVtfaVyoAd7VK5mqIiJqPwW+h+m4eTxdnKBQKmashImo+Br+FdFW1AABPF07TQET2hcFvIV2l1OLXurB/n4jsC4PfQvUtfq0rW/xEZF8Y/BYq+1UfPxGRPWHwW0hXWdfiZ/ATkZ1h8FuovsXPrh4isjcMfgvp2NVDRHaKwW+h6109bPETkX1h8FvI1NXDFj8R2RkGv4U4nJOI7BWD30K8gIuI7BWD30JlpikbGPxEZF8Y/BbScTgnEdkpBr8Fag1GXNMbALCrh4jsD4PfAvXdPADgweGcRGRnGPwWqO/mcVOr4Kzir5CI7AtTywJlnIufiOwYg98CHMpJRPaMwW+B6yN6GPxEZH8Y/BbgbReJyJ4x+C3Arh4ismcMfguUcZ4eIrJjDH4LcC5+IrJnDH4L8LaLRGTPGPwWuH6jdXb1EJH9kT34165di/DwcLi4uCAuLg4HDhxoctuamhosW7YMPXr0gIuLCwYOHIgdO3a0Y7USDuckInsma/Bv2bIFKSkpWLx4MY4cOYKBAwciMTERRUVFjW6/cOFCvPnmm1izZg2OHz+OadOmYcyYMTh69Gi71m06ucsWPxHZIVmDf9WqVXjyySeRnJyMvn37Yt26dXBzc8OGDRsa3f6dd97B888/j1GjRqF79+54+umnMWrUKLz66qvtWjdP7hKRPZMt+PV6PQ4fPoyEhITrxSiVSEhIQHp6eqP7VFdXw8XFxWydq6sr9u3b1+TnVFdXQ6fTmS2tVX9y14vDOYnIDskW/MXFxTAYDAgICDBbHxAQgIKCgkb3SUxMxKpVq3Dq1CkYjUbs2rUL27ZtQ35+fpOfk5qaCi8vL9MSGhraqrqFELzROhHZNdlP7rbE66+/jsjISPTu3RtqtRozZsxAcnIylMqmD2P+/PkoLS01LRcuXGhVDRV6A4xCesyuHiKyR7IFv5+fH1QqFQoLC83WFxYWIjAwsNF9/P398cknn6CiogLnz5/HyZMn4eHhge7duzf5ORqNBlqt1mxpjfrpGpxVCrg429X3JhERABmDX61WIzo6GmlpaaZ1RqMRaWlpiI+Pv+m+Li4uCAkJQW1tLT766CM8/PDDbV2uya9vsq5QKNrtc4mIrEXWs5MpKSmYPHkyYmJiEBsbi9WrV6OiogLJyckAgEmTJiEkJASpqakAgIyMDFy8eBGDBg3CxYsXsWTJEhiNRjz33HPtVrNpDD+HchKRnZI1vcaNG4fLly9j0aJFKCgowKBBg7Bjxw7TCd+8vDyz/vuqqiosXLgQZ8+ehYeHB0aNGoV33nkH3t7e7VZzGS/eIiI7pxBCCLmLaE86nQ5eXl4oLS21qL//k6MXMWtLJoZH+OLdqbe1QYVERC3T0lzj2ckW0nEoJxHZOQZ/C12froHBT0T2icHfQvXDOTkzJxHZKwZ/C+lMd99ii5+I7BODv4V0nIufiOwcg7+FeKN1IrJ3DP4WKmNXDxHZOQZ/C7Grh4jsHYO/hXijdSKydwz+Fro+ZQNb/ERknxj8LVBda0B1rREA5+InIvvF4G+B+hO7CgXgqWGLn4jsE4O/BeqHcnqonaBUci5+IrJPDP4W4FW7RNQRMPhboIxDOYmoA2DwtwCHchJRR8DgbwEO5SSijoDB3wK8CQsRdQQM/hao7+phHz8R2TMGfwvwRutE1BEw+FugfjgnW/xEZM8Y/C3AufiJqCNg8LcA5+Inoo6Awd8CnIufiDoCBn8LmFr87OohIjvG4G8BUx8/u3qIyI4x+JvJYBQoq+aoHiKyfwz+ZiqvC32AwU9E9o3B30z13TwaJyU0TiqZqyEishyDv5k4lJOIOgoGfzNdn6CN3TxEZN8Y/M1U39XDm6wTkb1j8DcTu3qIqKNg8DcTu3qIqKNg8DfT9bn42eInIvvG4G8m3naRiDoKBn8z8baLRNRRMPib6foEbWzxE5F9Y4o10/AIP3honNAzwFPuUoiIWkUhhBByF9GedDodvLy8UFpaCq1WK3c5RESt1tJcY1cPEZGDYfATETkYBj8RkYNh8BMRORgGPxGRg2HwExE5GAY/EZGDYfATETkY2YN/7dq1CA8Ph4uLC+Li4nDgwIGbbr969Wr06tULrq6uCA0NxezZs1FVVdVO1RIR2T9Zg3/Lli1ISUnB4sWLceTIEQwcOBCJiYkoKipqdPv33nsP8+bNw+LFi3HixAmsX78eW7ZswfPPP9/OlRMR2S9Zg3/VqlV48sknkZycjL59+2LdunVwc3PDhg0bGt3+hx9+wPDhwzFhwgSEh4fjvvvuw/jx42/5VwIREV0nW/Dr9XocPnwYCQkJ14tRKpGQkID09PRG9xk2bBgOHz5sCvqzZ89i+/btGDVqVJOfU11dDZ1OZ7YQETky2WbnLC4uhsFgQEBAgNn6gIAAnDx5stF9JkyYgOLiYtx+++0QQqC2thbTpk27aVdPamoqli5datXaiYjsmewnd1tiz549WLFiBf7xj3/gyJEj2LZtG7788kssX768yX3mz5+P0tJS03LhwoV2rJiIyPbI1uL38/ODSqVCYWGh2frCwkIEBgY2us8LL7yAJ554AlOnTgUAREVFoaKiAk899RQWLFgApbLh95hGo4FGo7H+ARAR2SnZWvxqtRrR0dFIS0szrTMajUhLS0N8fHyj+1y7dq1BuKtUKgCAg91WgIjIYrLegSslJQWTJ09GTEwMYmNjsXr1alRUVCA5ORkAMGnSJISEhCA1NRUAkJSUhFWrVmHw4MGIi4vD6dOn8cILLyApKcn0BUBERDcna/CPGzcOly9fxqJFi1BQUIBBgwZhx44dphO+eXl5Zi38hQsXQqFQYOHChbh48SL8/f2RlJSEF198Ua5DICKyO7z1IhGRneOtF4mI6KYY/EREDobBT0TkYBj8REQOhsFPRORgLAr+0tJSlJSUNFhfUlLCSdCIiGycRcH/2GOPYfPmzQ3Wb926FY899liriyIiorZjUfBnZGTgrrvuarB+xIgRyMjIaHVRRETUdiwK/urqatTW1jZYX1NTg8rKylYXRUREbcei4I+NjcVbb73VYP26desQHR3d6qKIiKjtWDRXz1//+lckJCQgKysL99xzDwAgLS0NBw8exM6dO61aIBERWZdFLf7hw4cjPT0doaGh2Lp1Kz7//HNERETgxx9/xB133GHtGomIyIo4SRsRkZ1raa5Z1NWTl5d309e7du1qydsSEVE7sCj4w8PDoVAomnzdYDBYXBAREbUti4L/6NGjZs9rampw9OhRrFq1ijdFISKycRYF/8CBAxusi4mJQXBwMFauXIlHHnmk1YUREVHbsOokbb169cLBgwet+ZZERGRlFrX4b5yITQiB/Px8LFmyBJGRkVYpjIiI2oZFwe/t7d3g5K4QAqGhoY1O3kZERLbDouD/9ttvzZ4rlUr4+/sjIiICTk4WvSUREbWTVl3Adfz4ceTl5UGv15utf+ihh1pdWFvhBVxE1NG0ywVcZ8+exSOPPIIff/wRCoUC9d8d9d0/HMdPRGS7LBrVM3PmTISHh6OoqAhubm44duwY9u7di5iYGOzZs8fKJRIRkTVZ1OJPT0/H7t274efnB6VSCZVKhdtvvx2pqal49tlnG1zgRUREtsOiFr/BYICnpycAwM/PD5cuXQIAhIWFITs723rVERGR1VnU4u/fvz+ysrLQrVs3xMXF4eWXX4ZarcZbb72F7t27W7tGIiKyIouCf+HChaioqAAALFu2DA8++CDuuOMO+Pr6YsuWLVYtkIiIrMtq8/GXlJSgU6dON5210xZwOCcRdTTtMpyzMT4+PtZ6KyIiakNWnaSNiIhsH4OfiMjBMPiJiBwMg5+IyMEw+ImIHAyDn4jIwTD4iYgcDIOfiMjBMPiJiBwMg5+IyMEw+ImIHAyDn4jIwTD4iYgcDIOfiMjBMPiJiBwMg5+IyMEw+ImIHAyDn4jIwTD4iYgcjE0E/9q1axEeHg4XFxfExcXhwIEDTW47YsQIKBSKBssDDzzQjhUTEdkv2YN/y5YtSElJweLFi3HkyBEMHDgQiYmJKCoqanT7bdu2IT8/37QcO3YMKpUKv/vd79q5ciIi+yR78K9atQpPPvkkkpOT0bdvX6xbtw5ubm7YsGFDo9v7+PggMDDQtOzatQtubm4MfiKiZpI1+PV6PQ4fPoyEhATTOqVSiYSEBKSnpzfrPdavX4/HHnsM7u7ujb5eXV0NnU5nthAROTJZg7+4uBgGgwEBAQFm6wMCAlBQUHDL/Q8cOIBjx45h6tSpTW6TmpoKLy8v0xIaGtrquomI7JnsXT2tsX79ekRFRSE2NrbJbebPn4/S0lLTcuHChXaskIjI9jjJ+eF+fn5QqVQoLCw0W19YWIjAwMCb7ltRUYHNmzdj2bJlN91Oo9FAo9G0ulYioo5C1ha/Wq1GdHQ00tLSTOuMRiPS0tIQHx9/030/+OADVFdX4/HHH2/rMomIOhRZW/wAkJKSgsmTJyMmJgaxsbFYvXo1KioqkJycDACYNGkSQkJCkJqaarbf+vXrMXr0aPj6+spRNhGR3ZI9+MeNG4fLly9j0aJFKCgowKBBg7Bjxw7TCd+8vDwoleZ/mGRnZ2Pfvn3YuXOnHCUTEdk1hRBCyF1Ee9LpdPDy8kJpaSm0Wq3c5RARtVpLc82uR/UQEVHLMfiJiBwMg5+IyMEw+ImIHAyDn4jIwTD4iYgcDIOfiMjBMPiJiBwMg5+IyMEw+ImIHAyDn4jIwTD4iYgcDIOfiMjBMPiJiBwMg5+IyMEw+ImIHAyDn4jIwTD4iYgcDIOfiMjBMPiJiBwMg5+IyMEw+ImIHAyDn4jIwTD4iYgcDIOfiMjBMPiJiBwMg5+IyME4yV0AEZHDMdQA1WWAvrxuhaLuh0J67O4HOGna7OMZ/ERErWE0AlVX65ZSoLLucXkRoLsElOXX/SyQXq8uA2orb/6ev98JdI1rs5IZ/ETUsVVckYLYzRdw8aprVdcxGoGyS0DJWWkpKwDKC6XQLi8CKn8B1O6Aa6fri0oNlBdIYV4f7MZay2pTaa7XIwQAIT3+dY1tgMFPRPZHCCmkf8m93oKuqZJ+Vl4FrpwGik8BxTlAZcn1/VQawKOztNRUAiXnbt36bi5nd8DVW/pycfEC3P0BbbC0eAYDnoHSF4eLFtB4AmpPQCVPBDP4iahxNZVAwU/AxSNSa9jFS+p7dveTQs3FW+qHVjlLgapSS0GmUAFKJ0CpqnusatiCrdVLgV1dClTppBZ5RTFw7Urdz2IpyJU3vFd5IVByBrhyFqipaP6xqD2k/nRDNVB6QVrqKZ0A7zDApzvgFQJ4BEjH5xEgBXXNNanlX7/UVgEegXWhHiL9dPcHnNTW+K23CwY/UUckhLQomzlwTwgp3PPSgQsZwMWjQNFxQBisVJACUCilAAcAg94Kb6kCvEOlLyBnV8DJRfqp9pBC3C8S8OsJ+EYAajfpi6y8CKi4LH2BqDSAb3fAq6tsLW+5ONbREtkjo0Hqkrh8Aig6KXVfVF2VWp713Rs1VXXPr11fBwXg0w3w6wX495RC0N3/+miS6nKgWie16i9kSIF4I/fOQMgQwL+3tE/FZanPvOKyVINBL41Qqa0GjDU3OQghfYkYbvgicXaXuj5cvAA3P8Ddt+6nH+DsJu1jNEh96IYaqZ/eNwLw7SG10lvSynZ2BTqFSYuDY/ATtZahRmpJlhVIJ/rK8qUugarSulEcOqk7o771LMT1n4ZqKTQNeim4jTcEoxBSyBqqLavtymlpyf7y1tuq1EDwEGk0SZeh0mNtcPNPNAohBbTRUBfY9Y+N0lL/GEI6YSpjH7ej42+dHJPRWBfIV68HtDBe75NWOl1/rHKu62d2ksK56ITUDVJ4HCj6Gbh6AabRGG3FyVVqtfv3Afx7SScnnVzqujdcrndzOLlKP51dpS+kK6elvxCKc4DL2dJxajzrTi56ABoPoFM3oGs8EDyodWPHFYq6/n5nqx02tQ0GP3UMRiPwv4PA+f1SoFeXA/oKqXtCX153IrF+qevisGZYK52kE36edYu7H6DRXh/hofGUtvk1hULqZ3ZykbosnFzq+sB/3cIW0glG77Dr/eMt4R0K9LirNUdGHRCDn+xXrR7I3Quc+ALI3i6dsGspJxfp5KCLVgpms66K+u6K2uvrFUqpxd25r7QE9AV8I6W+8+aeSCWSGYOf7IuhVgr7nz4CTnwuDQesp9ECPe6W+qXV7lJXhtr9hq4NT2k7jafUEnd2ke9YiGTC4CfbV1MJXDwMHP8M+PljoKLo+mseAUCvUUCfB4Hw39jVWGoiuTD4yfboLkn99XkZ0jDD/CzzoYKuPkC/0UD/R6WTkuxiIWoRBj/Jx1ArDX0szgEuHZEuGrp4WJoH5UbunYHuI4Co30knKzlyhMhiDH5qO0YDoLsIXM1rZDkPlF5s/MpQhUo6cdo1DgiNA0JjpVEtbTxxFZGjYPCT9V3KBA5vAo59VDds8iaUztKVlEGDpCtEQ6KBwAHSJfZE1CYY/GQdlVeBn7dJgZ+fdX290lkaS+7dVWq1e4cC3uF1z0Olse/soydqVwx+ajkhpO6aCxlA3n+BCwekK1iFUXpdpQb6JAFDJgPht1t24RERtRkGPzXPL7nAue+B3H1A7vdS3/2N/HsDQyYBAx6TJtsiIpvE4CepBV9VKk0GVlZQd0L2AlCaJ/28ctp8/nJA6sIJGiidfK0/CesZKE/9RNQisgf/2rVrsXLlShQUFGDgwIFYs2YNYmNjm9z+6tWrWLBgAbZt24aSkhKEhYVh9erVGDVqVDtWbecMNcDJL4Gj70gTd5UX3Xr2R6WTdOI1/A6g2x1Al1iegCWyU7IG/5YtW5CSkoJ169YhLi4Oq1evRmJiIrKzs9G5c+cG2+v1etx7773o3LkzPvzwQ4SEhOD8+fPw9vZu/+LtUen/pJOvR/7d+Lw2Gq0054xXiHRzCu9QwCtUGnUTPFia/oCI7J5CCNHG88k2LS4uDkOHDsUbb7wBADAajQgNDcUzzzyDefPmNdh+3bp1WLlyJU6ePAlnZ8su4NHpdPDy8kJpaSm0Wm2r6rd5QkgXR53+Bji1Czj33fUTsO6dpf74nvdfvweps6u89RKRRVqaa7K1+PV6PQ4fPoz58+eb1imVSiQkJCA9Pb3RfT777DPEx8dj+vTp+PTTT+Hv748JEyZg7ty5UKkaHzlSXV2N6urr3Rg63S3GlXcE9ePoT6dJ/fS/Fn4HEPN7oPeDnNeGyEHJFvzFxcUwGAwICAgwWx8QEICTJ082us/Zs2exe/duTJw4Edu3b8fp06fxpz/9CTU1NVi8eHGj+6SmpmLp0qVWr98mXTkD7F4uTWRWT6UGwoYDkfcCkYmAX4R89RGRTZD95G5LGI1GdO7cGW+99RZUKhWio6Nx8eJFrFy5ssngnz9/PlJSUkzPdTodQkND26vk9lFWCHz3N+DI29K88VAAUY8CUWOB8OHsmyciM7IFv5+fH1QqFQoLzU8yFhYWIjCw8WGBQUFBcHZ2NuvW6dOnDwoKCqDX66FWN+y60Gg00GhacTs5W2Y0AhnrpFZ+zTVpXeR9wD2LgcD+8tZGRDZLtuBXq9WIjo5GWloaRo8eDUBq0aelpWHGjBmN7jN8+HC89957MBqNUNZd5p+Tk4OgoKBGQ79Du3oB+PRPwLm90vMuQ4GEpVILn8gGCSFQW1sLg6GRifnolm5s9LaGrF09KSkpmDx5MmJiYhAbG4vVq1ejoqICycnJAIBJkyYhJCQEqampAICnn34ab7zxBmbOnIlnnnkGp06dwooVK/Dss8/KeRjtSwjgxy3A9r9IE6A5uwH3LQdi/sDZK8lm6fV65Ofn49q1a3KXYrcUCgW6dOkCDw+PVr+XrME/btw4XL58GYsWLUJBQQEGDRqEHTt2mE745uXlmVr2ABAaGoqvv/4as2fPxoABAxASEoKZM2di7ty5ch1C+6q8Cnz2DHDiM+l5l6HAmDcB3x6ylkV0M0ajEefOnYNKpUJwcDDUajUUbKS0iBACly9fxv/+9z9ERka2uuUv6zh+OdjtOP7Cn4EtjwMlZ6WraEfMA4bPBlR2dX6eHFBVVRXOnTuHsLAwuLnxam9LVVZWIjc3F926dYOLi/m9ou1mHD+1wI8fSC392krpitqxb0tz1xPZESWn324Va/6VxOC3ZYYaYOdCaeQOAPS4G/jtesDNR966iMiuMfhtUXW5dFOTjLeAwp+kdXfMAe56nnPbE1GrMfhtSf1UCz99COjLpHUaLTBmHdD7ATkrI6JWCg8Px6xZszBr1iy5S2Hw2wQhgE9nAJn/ub7Op7t0B6tBEwEPf/lqI3JgI0aMwKBBg7B69epWv9fBgwfh7m4bV9Ez+G3B/tel0FeogH6jgegp0mRqHPJGZNOEEDAYDHByunWU+vvbTgOOp9nldjoNSKubRG7Uy8CjG4Buv2HoU4clhMA1fa0sS0tGr0+ZMgXfffcdXn/9dSgUCigUCmzatAkKhQJfffUVoqOjodFosG/fPpw5cwYPP/wwAgIC4OHhgaFDh+Kbb74xe7/w8HCzvxwUCgX+9a9/YcyYMXBzc0NkZCQ+++wza/2ab4otfjmVnAM+/L00R/7gJ6Srb4k6uMoaA/ou+lqWzz6+LBFu6ubF3uuvv46cnBz0798fy5YtAwD8/PPPAIB58+bhlVdeQffu3dGpUydcuHABo0aNwosvvgiNRoN///vfSEpKQnZ2Nrp27drkZyxduhQvv/wyVq5ciTVr1mDixIk4f/48fHzaduQeW/xy0VdIF2RVXZVuaTjqFbbyiWyIl5cX1Go13NzcEBgYiMDAQNMVs8uWLcO9996LHj16wMfHBwMHDsQf//hH9O/fH5GRkVi+fDl69Ohxyxb8lClTMH78eERERGDFihUoLy/HgQMH2vzY2OKXgxDSBVmFx6Q7YY37D+Dscuv9iDoAV2cVji9LlO2zrSEmJsbseXl5OZYsWYIvv/wS+fn5qK2tRWVlJfLy8pp4B8mAAQNMj93d3aHValFUVGSVGm+GwS+HA28Bxz6Spl4Y+zagDZa7IqJ2o1Aomt3dYqtuHJ0zZ84c7Nq1C6+88goiIiLg6uqKRx99FHq9/qbvc+MtZBUKBYxGo9XrvZF9//bt0S+5wDdLpMf3vQiEDZOzGiK6CbVa3axppPfv348pU6ZgzJgxAKS/AHJzc9u4Osuxj789CQF8Pku6aUrY7UDsU3JXREQ3ER4ejoyMDOTm5qK4uLjJ1nhkZCS2bduGzMxMZGVlYcKECe3ScrcUg789ZW0Gzn4LqDTAQ38HOGkVkU2bM2cOVCoV+vbtC39//yb77FetWoVOnTph2LBhSEpKQmJiIoYMsd2JFDktc3spvwysHQpU/iLdGvGOlFvvQ9QB1E/L3Nh0wtR8N/s9tjTX2ORsL189J4V+YBQw7Bm5qyEiB8bgbw/ZX0mzbSpUwENvACrnW+9DRNRGGPxt7eoF4Iu6bp1hM4DgQbKWQ0TE4ZxtpaYK+GEN8P2r0p2zOnUD7pwnd1VERAx+qxNC6tr5er40Zh8AwoYDSX8H1LzfKBHJj8FvTdVlwEdPAjlfSc89g4H7lgP9f8t5eIjIZjD4raXyKvDuo8D/DgIqNRA/A7jjz4DGQ+7KiIjMMPit4VoJ8M5oID8LcO0EPL4NCLHdizeIyLEx+FurvAj498NA0XHAzQ+Y9CkQ2F/uqoiImsTgbw3dJeDth4ArpwCPQGDyZ4B/L7mrIiK6KY7jt5TRCLw7Vgp9r1AgeTtDn6iDGTFiBGbNmmW195syZQpGjx5ttfezFFv8ljrxKVD4E6DxkkLfu+nbqxER2RK2+C1hNALfrZQe3/Y0Q5+oJYSQbj0qx9LKm63n5ubi2LFjGDlyJDw8PBAQEIAnnngCxcXFpv0+/PBDREVFwdXVFb6+vkhISEBFRQWWLFmCt99+G59++qnp/fbs2dMGv+BbY4vfEtnbgaKfAbUncNs0uashsi8114AVMt117vlLgNr91tuh8ZutOzs7IzY2FlOnTsVrr72GyspKzJ07F2PHjsXu3buRn5+P8ePH4+WXX8aYMWNQVlaG77//HkIIzJkzBydOnIBOp8PGjRsBoM1vqt4UBn9LCQF89zfpcdwfpeGbRNTh3HizdQD461//isGDB2PFihWm7TZs2IDQ0FDk5OSgvLwctbW1eOSRRxAWFgYAiIqKMm3r6uqK6upq0/vJhcHfUjlfAwU/As7uQPx0uashsj/OblLLW67PboWsrCx8++238PBoeGHmmTNncN999+Gee+5BVFQUEhMTcd999+HRRx9Fp0621UBk8LfEr1v7sVMBN3n+TCOyawpFs7tbbE15eTmSkpLwt7/9rcFrQUFBUKlU2LVrF3744Qfs3LkTa9aswYIFC5CRkYFu3brJUHHjeHK3JU6nAZeOAE6uQDxvpkLU0d14s/UhQ4bg559/Rnh4OCIiIswWd3fpy0yhUGD48OFYunQpjh49CrVajY8//rjR95MLg7+5ft3aH/oHwMNf3nqIqM3deLP16dOno6SkBOPHj8fBgwdx5swZfP3110hOTobBYEBGRgZWrFiBQ4cOIS8vD9u2bcPly5fRp08f0/v9+OOPyM7ORnFxMWpqamQ5LgZ/c537DvjfAcDJhbdOJHIQN95sXa/XY//+/TAYDLjvvvsQFRWFWbNmwdvbG0qlElqtFnv37sWoUaPQs2dPLFy4EK+++ipGjhwJAHjyySfRq1cvxMTEwN/fH/v375fluHiz9ebaOAo4vx+ImwaMbNi/R0SN483WrcOaN1vnyd3mEEK6UMugB4bPlLsaIqJWYfA3h0IB9EmSFiIiO8c+fiIiB8PgJyJyMAx+ImoXDjaOxOqs+ftj8BNRm3J2dgYAXLt2TeZK7JterwcAqFSqVr8XT+4SUZtSqVTw9vZGUVERAMDNzQ0KhULmquyL0WjE5cuX4ebmBien1sc2g5+I2lz9bJT14U8tp1Qq0bVrV6t8aTL4iajNKRQKBAUFoXPnzrJNU2Dv1Go1lErr9M4z+Imo3ahUKqv0UVPr8OQuEZGDYfATETkYBj8RkYNxuD7++osgdDqdzJUQEVlHfZ419yIvhwv+srIyAEBoaKjMlRARWVdZWRm8vLxuuZ3DzcdvNBpx6dIleHp6Nms8rE6nQ2hoKC5cuNCy+fttBOuXjz3XDrB+ubWkfiEEysrKEBwc3Kwhnw7X4lcqlejSpUuL99NqtXb5j6ce65ePPdcOsH65Nbf+5rT06/HkLhGRg2HwExE5GAb/LWg0GixevBgajUbuUizC+uVjz7UDrF9ubVm/w53cJSJydGzxExE5GAY/EZGDYfATETkYBj8RkYNh8N/E2rVrER4eDhcXF8TFxeHAgQNyl9So1NRUDB06FJ6enujcuTNGjx6N7Oxss22qqqowffp0+Pr6wsPDA7/97W9RWFgoU8VNe+mll6BQKDBr1izTOluv/eLFi3j88cfh6+sLV1dXREVF4dChQ6bXhRBYtGgRgoKC4OrqioSEBJw6dUrGiq8zGAx44YUX0K1bN7i6uqJHjx5Yvny52ZwvtlT/3r17kZSUhODgYCgUCnzyySdmrzen1pKSEkycOBFarRbe3t74wx/+gPLyctnrr6mpwdy5cxEVFQV3d3cEBwdj0qRJuHTpkvXrF9SozZs3C7VaLTZs2CB+/vln8eSTTwpvb29RWFgod2kNJCYmio0bN4pjx46JzMxMMWrUKNG1a1dRXl5u2mbatGkiNDRUpKWliUOHDonbbrtNDBs2TMaqGzpw4IAIDw8XAwYMEDNnzjStt+XaS0pKRFhYmJgyZYrIyMgQZ8+eFV9//bU4ffq0aZuXXnpJeHl5iU8++URkZWWJhx56SHTr1k1UVlbKWLnkxRdfFL6+vuKLL74Q586dEx988IHw8PAQr7/+umkbW6p/+/btYsGCBWLbtm0CgPj444/NXm9Orffff78YOHCg+O9//yu+//57ERERIcaPHy97/VevXhUJCQliy5Yt4uTJkyI9PV3ExsaK6Ohos/ewRv0M/ibExsaK6dOnm54bDAYRHBwsUlNTZayqeYqKigQA8d133wkhpH9Qzs7O4oMPPjBtc+LECQFApKeny1WmmbKyMhEZGSl27dol7rzzTlPw23rtc+fOFbfffnuTrxuNRhEYGChWrlxpWnf16lWh0WjE+++/3x4l3tQDDzwgfv/735ute+SRR8TEiROFELZd/43B2Zxajx8/LgCIgwcPmrb56quvhEKhEBcvXmy32oVoWH9jDhw4IACI8+fPCyGsVz+7ehqh1+tx+PBhJCQkmNYplUokJCQgPT1dxsqap7S0FADg4+MDADh8+DBqamrMjqd3797o2rWrzRzP9OnT8cADD5jVCNh+7Z999hliYmLwu9/9Dp07d8bgwYPxf//3f6bXz507h4KCArP6vby8EBcXZxP1Dxs2DGlpacjJyQEAZGVlYd++fRg5ciQA26//15pTa3p6Ory9vRETE2PaJiEhAUqlEhkZGe1e862UlpZCoVDA29sbgPXqd7hJ2pqjuLgYBoMBAQEBZusDAgJw8uRJmapqHqPRiFmzZmH48OHo378/AKCgoABqtdr0j6deQEAACgoKZKjS3ObNm3HkyBEcPHiwwWu2XvvZs2fxz3/+EykpKXj++edx8OBBPPvss1Cr1Zg8ebKpxsb+LdlC/fPmzYNOp0Pv3r2hUqlgMBjw4osvYuLEiQBg8/X/WnNqLSgoQOfOnc1ed3Jygo+Pj80dT1VVFebOnYvx48ebJmmzVv0M/g5m+vTpOHbsGPbt2yd3Kc1y4cIFzJw5E7t27YKLi4vc5bSY0WhETEwMVqxYAQAYPHgwjh07hnXr1mHy5MkyV3drW7duxbvvvov33nsP/fr1Q2ZmJmbNmoXg4GC7qL+jqqmpwdixYyGEwD//+U+rvz+7ehrh5+cHlUrVYORIYWEhAgMDZarq1mbMmIEvvvgC3377rdnU04GBgdDr9bh69arZ9rZwPIcPH0ZRURGGDBkCJycnODk54bvvvsPf//53ODk5ISAgwGZrB4CgoCD07dvXbF2fPn2Ql5cHAKYabfXf0l/+8hfMmzcPjz32GKKiovDEE09g9uzZSE1NBWD79f9ac2oNDAxEUVGR2eu1tbUoKSmxmeOpD/3z589j165dZlMyW6t+Bn8j1Go1oqOjkZaWZlpnNBqRlpaG+Ph4GStrnBACM2bMwMcff4zdu3ejW7duZq9HR0fD2dnZ7Hiys7ORl5cn+/Hcc889+Omnn5CZmWlaYmJiMHHiRNNjW60dAIYPH95g6GxOTg7CwsIAAN26dUNgYKBZ/TqdDhkZGTZR/7Vr1xrcuEOlUsFoNAKw/fp/rTm1xsfH4+rVqzh8+LBpm927d8NoNCIuLq7da75RfeifOnUK33zzDXx9fc1et1r9FpyMdgibN28WGo1GbNq0SRw/flw89dRTwtvbWxQUFMhdWgNPP/208PLyEnv27BH5+fmm5dq1a6Ztpk2bJrp27Sp2794tDh06JOLj40V8fLyMVTft16N6hLDt2g8cOCCcnJzEiy++KE6dOiXeffdd4ebmJv7zn/+YtnnppZeEt7e3+PTTT8WPP/4oHn74YZsZzjl58mQREhJiGs65bds24efnJ5577jnTNrZUf1lZmTh69Kg4evSoACBWrVoljh49ahr10pxa77//fjF48GCRkZEh9u3bJyIjI9ttOOfN6tfr9eKhhx4SXbp0EZmZmWb/L1dXV1u1fgb/TaxZs0Z07dpVqNVqERsbK/773//KXVKjADS6bNy40bRNZWWl+NOf/iQ6deok3NzcxJgxY0R+fr58Rd/EjcFv67V//vnnon///kKj0YjevXuLt956y+x1o9EoXnjhBREQECA0Go245557RHZ2tkzVmtPpdGLmzJmia9euwsXFRXTv3l0sWLDALGhsqf5vv/220X/rkydPbnatV65cEePHjxceHh5Cq9WK5ORkUVZWJnv9586da/L/5W+//daq9XNaZiIiB8M+fiIiB8PgJyJyMAx+IiIHw+AnInIwDH4iIgfD4CcicjAMfiIiB8PgJ2ql8PBwrF69Wu4yiJqNwU/UTJs2bWowPTQAHDx4EE899VSbfz6/YMhaOC0zUSv5+/vLXUKL6PV6qNVqucsgGbHFT3ZnxIgRePbZZ/Hcc8/Bx8cHgYGBWLJkSbP2vXr1KqZOnQp/f39otVrcfffdyMrKMr2elZWFu+66C56entBqtYiOjsahQ4ewZ88eJCcnm+6IpFAoTJ95Y0tcoVDgzTffxIMPPgg3Nzf06dMH6enpOH36NEaMGAF3d3cMGzYMZ86cMe1z5swZPPzwwwgICICHhweGDh2Kb775xuyYz58/j9mzZ5s+v95HH32Efv36QaPRIDw8HK+++qrZMYeHh2P58uWYNGkStFotnnrqKej1esyYMQNBQUFwcXFBWFiYaSpmcgBWnYGIqB3ceeedQqvViiVLloicnBzx9ttvC4VCIXbu3HnLfRMSEkRSUpI4ePCgyMnJEX/+85+Fr6+vuHLlihBCiH79+onHH39cnDhxQuTk5IitW7eKzMxMUV1dLVavXi20Wq1pxsT6ibHCwsLEa6+9ZvoMACIkJERs2bJFZGdni9GjR4vw8HBx9913ix07dojjx4+L2267Tdx///2mfTIzM8W6devETz/9JHJycsTChQuFi4uLadbJK1euiC5duohly5aZPl8IIQ4dOiSUSqVYtmyZyM7OFhs3bhSurq5mE/SFhYUJrVYrXnnlFXH69Glx+vRpsXLlShEaGir27t0rcnNzxffffy/ee++91v6nITvB4Ce7c+eddza4wfnQoUPF3Llzb7rf999/L7RaraiqqjJb36NHD/Hmm28KIYTw9PQUmzZtanT/jRs3Ci8vrwbrGwv+hQsXmp6np6cLAGL9+vWmde+//75wcXG5ab39+vUTa9asafJzhBBiwoQJ4t577zVb95e//EX07dvXbL/Ro0ebbfPMM8+Iu+++WxiNxpvWQB0Tu3rILg0YMMDseVBQUIM7E90oKysL5eXl8PX1hYeHh2k5d+6cqdslJSUFU6dORUJCAl566SWz7hhL66u/B2xUVJTZuqqqKuh0OgBAeXk55syZgz59+sDb2xseHh44ceKE6U5eTTlx4gSGDx9utm748OE4deoUDAaDad2vb84NAFOmTEFmZiZ69eqFZ599Fjt37rToOMk+8eQu2SVnZ2ez5wqFwnTXqKaUl5cjKCgIe/bsafBa/WidJUuWYMKECfjyyy/x1VdfYfHixdi8eTPGjBljcX31/fGNrauvec6cOdi1axdeeeUVREREwNXVFY8++ij0en2LPrcp7u7uZs+HDBmCc+fO4auvvsI333yDsWPHIiEhAR9++KFVPo9sG4OfHMaQIUNQUFAAJycnhIeHN7ldz5490bNnT8yePRvjx4/Hxo0bMWbMGKjVarNWtDXt378fU6ZMMX3BlJeXIzc312ybxj6/T58+2L9/f4P36tmzJ1Qq1U0/U6vVYty4cRg3bhweffRR3H///SgpKYGPj0/rD4hsGrt6yGEkJCQgPj4eo0ePxs6dO5Gbm4sffvgBCxYswKFDh1BZWYkZM2Zgz549OH/+PPbv34+DBw+iT58+AKTRMeXl5UhLS0NxcTGuXbtmtdoiIyOxbds2ZGZmIisrCxMmTGjwF0x4eDj27t2Lixcvori4GADw5z//GWlpaVi+fDlycnLw9ttv44033sCcOXNu+nmrVq3C+++/j5MnTyInJwcffPABAgMDG71OgToeBj85DIVCge3bt+M3v/kNkpOT0bNnTzz22GM4f/48AgICoFKpcOXKFUyaNAk9e/bE2LFjMXLkSCxduhQAMGzYMEybNg3jxo2Dv78/Xn75ZavVtmrVKnTq1AnDhg1DUlISEhMTMWTIELNtli1bhtzcXPTo0cN07cCQIUOwdetWbN68Gf3798eiRYuwbNkyTJky5aaf5+npiZdffhkxMTEYOnQocnNzsX379gY3XqeOibdeJCJyMPx6JyJyMAx+6jDeffdds2Gav1769esnd3lENoNdPdRhlJWVobCwsNHXnJ2dERYW1s4VEdkmBj8RkYNhVw8RkYNh8BMRORgGPxGRg2HwExE5GAY/EZGDYfATETkYBj8RkYNh8BMROZj/B/Ihv/uQM40nAAAAAElFTkSuQmCC\n"
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "steps = np.arange(1, MAX_ESTIMATORS, 2)\n",
+ "rfc = RandomForestClassifier(warm_start=True)\n",
+ "\n",
+ "%time train_scores, test_scores = inc_fit(x, y, rfc=rfc, steps=steps)\n",
+ "print(f\"With {len(rfc.estimators_)}: {train_scores[-1]} | {test_scores[-1]}\")\n",
+ "plot_auc(steps, train_scores, test_scores)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "### Streaming random forest"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "def inc_partial_fit(x: np.ndarray, y:np.ndarray,\n",
+ " srfc=None,\n",
+ " steps=np.arange(1, 101, 2),\n",
+ " sample: int=0.1,\n",
+ " **kwargs) -> None:\n",
+ " \n",
+ " \"\"\"\n",
+ " Fit increasing number of estimators using .partial_fit on a subsample of the training data.\n",
+ " \n",
+ " StreamingRFC.n_estimators: Number of estimators that will be fit in each step. Set from first\n",
+ " difference in range (ie. range[1]-range[0])\n",
+ " StreamingRFC.max_n_estimators: Limit on number of estimators than will be fit in model. Should >\n",
+ " range[-1].\n",
+ " \n",
+ " :param srfc: StreamingRFC model to test. Default = None (use example with default RFC params).\n",
+ " If model is supplied, the .n_estimators param should match the constant range\n",
+ " step size.\n",
+ " :param steps: Range to iterate over. Sets total number of estimators that will be fit in model\n",
+ " after each iteration. Should be range with constant step size.\n",
+ " :param sample: Proportion of randomly sampled training data to use on each partial_fit call.\n",
+ " If sample = 1, all training data is used on each interation,\n",
+ " so should behave as standard random forest. Default = 0.1 (10%)\n",
+ " \"\"\"\n",
+ " \n",
+ " x_train, x_test, y_train, y_test = train_test_split(x, y, \n",
+ " test_size=0.25,\n",
+ " random_state=1)\n",
+ " n_train = x_train.shape[0]\n",
+ " \n",
+ " if srfc is None:\n",
+ " srfc = StreamingRFC(n_estimators_per_chunk=np.diff(steps)[0],\n",
+ " max_n_estimators=np.max(steps),\n",
+ " **kwargs)\n",
+ " \n",
+ " train_scores = []\n",
+ " test_scores = []\n",
+ " for s in steps:\n",
+ " \n",
+ " use_idx = np.arange(0, n_train)[np.random.randint(low=0, \n",
+ " high=n_train, \n",
+ " size=int(n_train * sample))]\n",
+ " \n",
+ " # Fit model with these n ests\n",
+ " srfc.partial_fit(x_train[use_idx, :], y_train[use_idx],\n",
+ " classes=np.unique(y))\n",
+ " \n",
+ " tr_score, te_score = score(srfc,\n",
+ " train=(x_train, y_train),\n",
+ " test=(x_test, y_test),\n",
+ " pr=False)\n",
+ " train_scores.append(tr_score)\n",
+ " test_scores.append(te_score)\n",
+ " \n",
+ " return train_scores, test_scores"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "#### As normal random forest\n",
+ "1 estimator per full subset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n",
+ "C:\\Users\\Gareth\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:425: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.\n",
+ " warn(\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "CPU times: total: 41.7 s\n",
+ "Wall time: 41.9 s\n",
+ "With 119: 1.0 | 0.6418562031518801\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": "",
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAFzCAYAAAA5RGIiAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8o6BhiAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA470lEQVR4nO3de1xUZf4H8M/MwHAfkPtFLt5DxRsIobVZkqhlaVuZWir9rJetlsq6qWlqVtJqma25a22m1VZqZZfdzDTUvESKFzBTwTuGchNluA/MPL8/joxNgMIwcGaYz/v1mtcyZ86Z+Y5rHx++5znPUQghBIiIyG4o5S6AiIjaFoOfiMjOMPiJiOwMg5+IyM4w+ImI7AyDn4jIzjD4iYjsDIOfiMjOOMhdQFszGAy4dOkSPDw8oFAo5C6HiKjFhBAoLS1FcHAwlMpbj+ftLvgvXbqE0NBQucsgIrK4ixcvomPHjrfcz+6C38PDA4D0B6TRaGSuhoio5bRaLUJDQ435dit2F/x17R2NRsPgJ6J2panta57cJSKyMwx+IiI7w+AnIrIzDH4iIjvD4CcisjMMfiIiO8PgJyKyM7IG/+7duzFq1CgEBwdDoVDgq6++uuUxu3btwoABA+Dk5ISuXbti/fr1rV4nEVF7Imvwl5eXo2/fvli9enWT9j937hzuu+8+3H333cjIyMDMmTMxZcoUfP/9961cKRFR+yHrlbsjRozAiBEjmrz/mjVr0KlTJ7zxxhsAgMjISOzduxdvvvkmEhMTW6vMdqVGb0CFTo/qWj10tQboag3QGwRq9AI1egNq9AbUGgT0BoFag4Dh+s8GIWAQgEH8/rmAwQDohYD43esGAUAIXP8fiOs//564vkHa54+v1t/PZFu9d2t4v+Zo4eFEFvVgv2AEebq02vvb1JINaWlpSEhIMNmWmJiImTNnNnpMdXU1qqurjc+1Wm1rlSeLqho9coorkHu1ErnXKlFYWo3ich2uVuigrapFSWUNtJU1KKmsQWlVDWr0jDgiaxcT3oHBXycvLw8BAQEm2wICAqDValFZWQkXl/p/UCkpKXjppZfaqsRWI4RAvrYav+SW4JfcEhzLLUFWXikulVSaPdpVOyjhpFLCQaWAg0oJ9fWfHVVKOCgVUCoUUCkVUCoVUCoABQCV8vq266+hbptCAYUCUF7/XwVuPMf1Y+vUrSeiMD43ff2P+xmfN/QlbrI0ieJmLxJZMR93p1Z9f5sKfnPMmzcPycnJxud1q9jZgqoaPfacKsLWY3n4MbsQRWXVDe7n4eyA0A6uCPZygb/GCT5uanRwVcPTxREaF0d4Xn94ODvAVa2Ci1oFtUrJ+xEQ2SmbCv7AwEDk5+ebbMvPz4dGo2lwtA8ATk5OcHJq3X89La2suhYfpp3He3vOobhcZ9yuVADd/D3QO8QTUSEa9ArxRGdfN3i7qRniRNRkNhX88fHx2LJli8m27du3Iz4+XqaKLKu6Vo+P0i7g7Z2nca2iBgAQ5OmM4b0DMaxnIPqFesFFrZK5SiKydbIGf1lZGU6fPm18fu7cOWRkZMDb2xthYWGYN28ecnNz8eGHHwIApk6dirfffhvPP/88nnzySezYsQObNm3Ct99+K9dXsJidWQVY+PUxXCyuBAB09nPDs/d0xag+wXBQ8To7IrIcWYP/4MGDuPvuu43P63rxkyZNwvr163H58mXk5OQYX+/UqRO+/fZbzJo1C2+99RY6duyI9957z6ancgoh8M9dZ/D6tiwIAfh7OOGvw7rj4ehQ48lTIiJLUoibTaJuh7RaLTw9PVFSUiL7Hbhq9AbM/iwTX2dcAgA8fnsYXhgZCVe1TXXgiEhmzc01JoxMhBB4YfMv+DrjEhyUCrz0YC9MiAuXuywisgMMfpm8lXoKnx36DUoF8M4T0RgaGXDrg4iILIBnDWXw5ZHfsPKHUwCAV0ZHMfSJqE0x+NtYdn4p5m3+BQDwlyFdMD4uTOaKiMjeMPjbUHl1Lf7y8WFU1RhwZzdfzB7WQ+6SiMgOMfjb0MKvf8XpgjIEaJywcmw/KDldk4hkwOBvI4dzruKLw9LJ3FXjBrT6IkxERI1h8LcBIQSWb80CADw0oCNiO3nLXBER2TMGfxvYe7oIaWevQK1SYmZCN7nLISI7x+BvZUIILLs+2p9wexg6dnCVuSIisncM/la2M6sAv+SWwFWtwrS7u8pdDhERg7+1bUr/DQAwLjYMvjyhS0RWgMHfiq6W65B6UrpxzMPRHWWuhohIwuBvRf89egk1eoGeQRpEBsm7EigRUR0Gfyv64nAuAODPHO0TkRVh8LeS0wVlyLx4DSqlAg/0DZa7HCIiIwZ/K/nisHRSd0h3P/h58KQuEVkPBn8r2XGiAADwYP8QmSshIjLF4G8FxeU6ZOWXAgDu6OorczVERKYY/K3gwLkrAIDuAe7wdlPLXA0RkSkGfyv4+WwxAOD2zj4yV0JEVB+DvxX8fFYa8cd1YvATkfVh8FvYtYob/f24zlx+mYisD4PfwvafK4YQQFd/d67NQ0RWicFvYfuN/X2O9onIOjH4LYz9fSKydgx+CyqprMGJPC0A9veJyHox+C0oK68UQgAhXi7w93CWuxwiogYx+C0o+/psnu4B7jJXQkTUOAa/BZ0yBr+HzJUQETWOwW9B2fllAIBuDH4ismIMfgs6VcBWDxFZPwa/hRSX61BUpgMgXbxFRGStGPwWUndiN9TbBa5qB5mrISJqHIPfQowndv3Z3yci68bgtxCe2CUiW8HgtxDO4SciW8Hgt5BTBdKIn3P4icjaMfgtoKisGsXlOigUQBc/jviJyLox+C2grs0T5u0KF7VK5mqIiG6OwW8Bp+pO7HJGDxHZAAa/BZy/Ug4A6OLnJnMlRES3xuC3gNyrlQCAkA4uMldCRHRrDH4LuFRyPfi9GPxEZP0Y/BZQN+IPZvATkQ1g8LdQha4WVytqADD4icg2MPhb6NK1KgCAh5MDPF0cZa6GiOjWGPwtlHuNbR4isi2yB//q1asREREBZ2dnxMXF4cCBA43uW1NTgyVLlqBLly5wdnZG3759sXXr1jastr5L1zijh4hsi6zBv3HjRiQnJ2PRokU4fPgw+vbti8TERBQUFDS4/4IFC/DOO+9g1apVOH78OKZOnYoxY8bgyJEjbVz5DTdO7DrLVgMRUXPIGvwrVqzAU089haSkJPTs2RNr1qyBq6sr3n///Qb3/+ijj/DCCy9g5MiR6Ny5M5555hmMHDkSb7zxRhtXfsMltnqIyMbIFvw6nQ6HDh1CQkLCjWKUSiQkJCAtLa3BY6qrq+HsbDqydnFxwd69exv9nOrqami1WpOHJdX1+DmHn4hshWzBX1RUBL1ej4CAAJPtAQEByMvLa/CYxMRErFixAqdOnYLBYMD27duxefNmXL58udHPSUlJgaenp/ERGhpq0e/B4CciWyP7yd3meOutt9CtWzfcdtttUKvVmD59OpKSkqBUNv415s2bh5KSEuPj4sWLFqtHbxDIK5Gmc/LkLhHZCtmC39fXFyqVCvn5+Sbb8/PzERgY2OAxfn5++Oqrr1BeXo4LFy7g5MmTcHd3R+fOnRv9HCcnJ2g0GpOHpRSUVqHWIKBSKuDvwZO7RGQbZAt+tVqN6OhopKamGrcZDAakpqYiPj7+psc6OzsjJCQEtbW1+OKLL/Dggw+2drkNqjuxG6hxhkqpkKUGIqLmcpDzw5OTkzFp0iTExMQgNjYWK1euRHl5OZKSkgAAEydOREhICFJSUgAA+/fvR25uLvr164fc3FwsXrwYBoMBzz//vCz1/8ZVOYnIBska/GPHjkVhYSEWLlyIvLw89OvXD1u3bjWe8M3JyTHp31dVVWHBggU4e/Ys3N3dMXLkSHz00Ufw8vKSpf665Rp4YpeIbIlCCCHkLqItabVaeHp6oqSkpMX9/he/OoaPfr6A6Xd3xezEHhaqkIioeZqbazY1q8facJ0eIrJFDP4WuHHVLmf0EJHtYPC3QJ5W6vEHeXLET0S2g8Fvphq9Adeu34DF110tczVERE3H4DfT1QodAEChALxcGfxEZDsY/GYqLpeCv4OrmhdvEZFNYfCb6UqZFPw+bhztE5FtYfCb6cr1Eb83g5+IbAyD30xXyqoBAL7uTjJXQkTUPAx+MxVzxE9ENorBb6a6Vo8Pp3ISkY1h8JuprtXDk7tEZGsY/Ga60ephj5+IbAuD30zG6Zxs9RCRjWHwm8nY42erh4hsDIPfDDV6A0oqpXV6fDidk4hsDIPfDFevj/aVCsDLxVHmaoiImofBb4Yrv1unR8l1eojIxjD4zcATu0Rkyxj8ZrhSLs3h51W7RGSLGPxmKDZetcsTu0Rkexj8ZuCSzERkyxj8Zrgxh58jfiKyPQx+M9St0+PNk7tEZIMY/GYo5lW7RGTDGPxmYPATkS1j8JuhqG5JZrZ6iMgGMfibSVdrgLaqFgBP7hKRbWLwN9PVCqnNo1Iq4Ml1eojIBjH4m6luDj/X6SEiW8Xgbyae2CUiW8fgb6ayaqm/7+7sIHMlRETmYfA3U2WNFPyuapXMlRARmYfB30wVOj0AwMWRwU9EtonB30yVdcHPET8R2SgGfzPVBT9bPURkqxj8zVRRU9fq4cldIrJNDP5mutHq4R8dEdkmplcz3Wj1cMRPRLaJwd9MN1o97PETkW1i8DcTZ/UQka1j8DcTL+AiIlvH4G8mXsBFRLaOwd9MbPUQka1j8DdTZQ0v4CIi28bgb6a6Vo8zWz1EZKMY/M1UxXn8RGTjGPzNIIQwzuNnq4eIbJXswb969WpERETA2dkZcXFxOHDgwE33X7lyJXr06AEXFxeEhoZi1qxZqKqqapNadXoD9AYBgK0eIrJdsgb/xo0bkZycjEWLFuHw4cPo27cvEhMTUVBQ0OD+n3zyCebOnYtFixbhxIkTWLt2LTZu3IgXXnihTeqt0hmMP3PET0S2StbgX7FiBZ566ikkJSWhZ8+eWLNmDVxdXfH+++83uP9PP/2EwYMHY/z48YiIiMCwYcMwbty4W/6WYCkV1y/eclQp4KiS/ZclIiKzyJZeOp0Ohw4dQkJCwo1ilEokJCQgLS2twWMGDRqEQ4cOGYP+7Nmz2LJlC0aOHNno51RXV0Or1Zo8zMUZPUTUHsg2NaWoqAh6vR4BAQEm2wMCAnDy5MkGjxk/fjyKiopwxx13QAiB2tpaTJ069aatnpSUFLz00ksWqZk3YSGi9sCm+hW7du3C0qVL8c9//hOHDx/G5s2b8e233+Lll19u9Jh58+ahpKTE+Lh48aLZn3/j4i1O5SQi2yVbgvn6+kKlUiE/P99ke35+PgIDAxs85sUXX8QTTzyBKVOmAACioqJQXl6Op59+GvPnz4dSWf/fMScnJzg5OVmkZrZ6iKg9kG3Er1arER0djdTUVOM2g8GA1NRUxMfHN3hMRUVFvXBXqaQQFkK0XrHXsdVDRO2BrD2L5ORkTJo0CTExMYiNjcXKlStRXl6OpKQkAMDEiRMREhKClJQUAMCoUaOwYsUK9O/fH3FxcTh9+jRefPFFjBo1yvgPQGviksxE1B7IGvxjx45FYWEhFi5ciLy8PPTr1w9bt241nvDNyckxGeEvWLAACoUCCxYsQG5uLvz8/DBq1Ci8+uqrbVIvWz1E1B4oRFv0SKyIVquFp6cnSkpKoNFomnXse3vO4pVvT+DBfsF467H+rVQhEVHzNDfXbGpWj9zY4yei9oDB3wx1C7Sx1UNEtozB3wwc8RNRe8Dgb4ZKrsVPRO2AWcFfUlKC4uLietuLi4tbtBaOtWOrh4jaA7OC/7HHHsOGDRvqbd+0aRMee+yxFhdlrdjqIaL2wKzg379/P+6+++5624cMGYL9+/e3uChrxQu4iKg9MCv4q6urUVtbW297TU0NKisrW1yUteIFXETUHpgV/LGxsXj33XfrbV+zZg2io6NbXJS1YquHiNoDs6anvPLKK0hISEBmZiaGDh0KAEhNTUV6ejq2bdtm0QKtSSVvtE5E7YBZI/7BgwcjLS0NoaGh2LRpE/773/+ia9euOHr0KO68805L12g12OohovbA7Anp/fr1w8cff2zJWqwe5/ETUXtgVoLl5OTc9PWwsDCzirFmQgi2eoioXTAr+CMiIqBQKBp9Xa/Xm12QtdLpDdAbpIVM2eohIltmVvAfOXLE5HlNTQ2OHDmCFStWtNna+G2trs0DcMRPRLbNrODv27dvvW0xMTEIDg7G8uXL8dBDD7W4MGtT1+ZxVCngqOISR0RkuyyaYD169EB6erol39JqcEYPEbUXZo34/7gQmxACly9fxuLFi9GtWzeLFGZtePEWEbUXZgW/l5dXvZO7QgiEhoY2uHhbe3BjRg+nchKRbTMrxXbu3GnyXKlUws/PD127doWDQ/sMRrZ6iKi9MCul77rrLgDA8ePHkZOTA51Oh6tXryI7OxsA8MADD1iuQitRqePKnETUPpgV/GfPnsVDDz2Eo0ePQqFQQAhpfntd+6c9zuPnxVtE1F6YNatnxowZiIiIQEFBAVxdXXHs2DHs3r0bMTEx2LVrl4VLtA5s9RBRe2HWiD8tLQ07duyAr68vlEolVCoV7rjjDqSkpOC5556rd4FXe8BZPUTUXpg14tfr9fDw8AAA+Pr64tKlSwCA8PBwZGVlWa46K8LgJ6L2wqwRf+/evZGZmYlOnTohLi4Oy5Ytg1qtxrvvvovOnTtbukarwButE1F7YVbwL1iwAOXl5QCAJUuW4P7778edd94JHx8fbNy40aIFWguO+ImovTAr+BMTE40/d+3aFSdPnkRxcTE6dOhw01U7bRnX4iei9sJiKebt7W2pt7JKbPUQUXvBZSabiBdwEVF7wb5FE73+SF9oK2vh6eoodylERC3C4G8iL1c1vFzVcpdBRNRibPUQEdkZBj8RkZ1h8BMR2RkGPxGRnWHwExHZGQY/EZGdYfATEdkZBj8RkZ1h8BMR2RkGPxGRnWHwExHZGQY/EZGdYfATEdkZBj8RkZ1h8BMR2RkGPxGRnbGK4F+9ejUiIiLg7OyMuLg4HDhwoNF9hwwZAoVCUe9x3333tWHFRES2S/bg37hxI5KTk7Fo0SIcPnwYffv2RWJiIgoKChrcf/Pmzbh8+bLxcezYMahUKjzyyCNtXDkRkW2SPfhXrFiBp556CklJSejZsyfWrFkDV1dXvP/++w3u7+3tjcDAQONj+/btcHV1ZfATETWRrMGv0+lw6NAhJCQkGLcplUokJCQgLS2tSe+xdu1aPPbYY3Bzc2vw9erqami1WpMHEZE9kzX4i4qKoNfrERAQYLI9ICAAeXl5tzz+wIEDOHbsGKZMmdLoPikpKfD09DQ+QkNDW1w3EZEtk73V0xJr165FVFQUYmNjG91n3rx5KCkpMT4uXrzYhhUSEVkfBzk/3NfXFyqVCvn5+Sbb8/PzERgYeNNjy8vLsWHDBixZsuSm+zk5OcHJyanFtRIRtReyjvjVajWio6ORmppq3GYwGJCamor4+PibHvvZZ5+huroajz/+eGuXSUTUrsg64geA5ORkTJo0CTExMYiNjcXKlStRXl6OpKQkAMDEiRMREhKClJQUk+PWrl2L0aNHw8fHR46yiYhsluzBP3bsWBQWFmLhwoXIy8tDv379sHXrVuMJ35ycHCiVpr+YZGVlYe/evdi2bZscJRMR2TSFEELIXURb0mq18PT0RElJCTQajdzlEBG1WHNzzaZn9RARUfMx+ImI7AyDn4jIzjD4iYjsDIOfiMjOMPiJiOwMg5+IyM4w+ImI7AyDn4jIzjD4iYjsDIOfiMjOMPiJiOwMg5+IyM4w+ImI7AyDn4jIzjD4iYjsDIOfiMjOMPiJiOwMg5+IyM4w+ImI7AyDn4jIzjD4iYjsDIOfiMjOMPiJiOwMg5+IyM44yF0AEZFd0pUDJb8BThrA1QdwULfZRzP4iYjMUV0KGGoBpQNQUwWUFwBlBUC1FqjSAoYaQAigpgK4dhHQ5gL6GkDopcAvygaE4cb7dYgAQuOA0Figx0hAE9xqpTP4iYhuRlcB5B0FLmdKj4LjwNXzQOXVlr+32gOoKZf+Abh6Xnoc3Qh06MTgJyJqVVVaIP8Y8NtB4LcD0ohcGICaSuDKadOReYMUUrvGzQ9w9gScNYDqeuvGwQnw7Ah4hgKOLoBCKe0b1BfwCAQMBqCyWPpH5eIB4OJ+oGNMq35dBj8RtR9lhUBJDqB2vx7AnoCDsxTcJReBK2eAwpNAwQmgNE9qy5TlSyPtm3EPAIL6AcH9gIDegE8XwCtceu+6do/KzDhVKgE3X6DrUOnRBhj8RCQ/XQVQfBYoPgNoLwNledLI2C8S8OshBa+rtxTWeb8ABb8ChdnS/rU6AEIK8PLC+u+tUku9dkPNzWvQhADB/aUeu293QKGSwtzvtpu3XcwNfBnZXsVEZBv0tdIJzYorQFWJ1DK5eEDql1eXAbWVQG01oNdJD4tQSO2T2irpM4Xhxnur1FLv3K8H4B8JeIVdn1HjDfj3lP7XTjD4idqz0jypZ1ySK42IayoBleP1UbBBmmHi5g8E9AK8O0tti5pKoOqaFNjFZ4G8Y1LP268HEBIttTlcfaU2ispRGpmX5kn7FBwHLh2+fsxF6f2aytlLem/PjoB7oBTYhSel2S8VxQCE1FLxi5Tq9esB+HYDHF2l4128pNG52k16bjAAujKpnSOENGpXqiz752ujGPxE7UGtDii9LI2w838Fcg9Jo+viM5b7jN8OAEc+at4xKqcbJzw9AqU2Ski0dHLT0UU68alSS2Ht0qHx9zHopRG82k06pimUSukkq7OmeTXbAQY/ka2o1QGFJ6Qed2EWUHQKuHZBGslXXGnkIAUQ2FvqWbv5S8Gpr5bmkytUgEIhjczzjgHaS9JFRA4u0ujZxVsaJQdGSaPw/F+l0bz28o32TV3f3NVH6pH7dAGCB0gzVny6Ah5BUgC3lFJlV62Y1sbgJ5KbrlyaTlhbKV0AlP+rFOzaXCmMK4ulnnjlVak10xiVWgpqn67SqDokWrogyMXLMnVGPVx/mxBSy4gtFJvC4CdqDQaDNBq/cgbQ/iYFeN0JzSqtFOblhVJfvDkXAjl7SSNw/55Sf9u7s9RCcfOXRt2WGF03h0Ih/eZANoXBT2QJVVrg/B4gJw3I+VkatddUNP14hfJGi8U/8vqsk3CpVeLmK80+cfGSnisUrfUtyE4w+ImaQgjgWo7UTw/qK51grC4DftkE/PoVcGFf/RksKrXUdvEMBTxDpPB2dJH67K6+0gjdM0TqnztpGOjUZhj8RHWEkKY+XsqQFuCqLpGuBC3LA/KPS4twAYCjGxAWJ13eX629cbx3F6DTnUDYIOlCIO/ONnlxD7V//FtJ9k0IaZZM1hYgcwNw9Vzj+yodpJF5ZTFwZoe0zbsLED0ZuO0+aUYLkQ1g8JN90VUAp3+QpiUWZgOXjgCll268rnYHOg+R2jBOHtIcdPcAafQe1Eeal56XCVz4SbqAqPM9bX9ClaiFGPzUvgghXT16brd01abj9as4ywukq1BP/SAtg/t7jq5A57uBng8AkaNuXPnZmOD+0oPIRjH4yfbUVAFnUqV2i5NGuky/thI4v0+aWVN6+ebHe4YB3RKk4/xuk+a6Ozq3Te1EVoDBT7ajvAjYvRw48h9pNN8YBxcgYrA09bGmQrrAyD1AenT6k3RhE2fQkB1j8JP1KSuULnqq1UltmZJcabGuQx8AulJpH02IdELVUCv16hUKICxeCvzQ2zmCJ7oJBj9Zh98OAftWAr+l37xVE9gHSFgs9eR5UpXILAx+antVWuDgWmm5AmcvoCgLOPHf3+2gkNoyjs5S28YzRFo7PeJOoOdoBj5RC8ke/KtXr8by5cuRl5eHvn37YtWqVYiNjW10/2vXrmH+/PnYvHkziouLER4ejpUrV2LkyJFtWDWZpTQfOPY5sOeN+qtJKpRA33FA/yek1SSdPOSpkcgOyBr8GzduRHJyMtasWYO4uDisXLkSiYmJyMrKgr+/f739dTod7r33Xvj7++Pzzz9HSEgILly4AC8vr7Yvnm5NVyHNdz/9A3B2l7SkcB2fbtL0yepSKfSjJ0vr0xBRq1MIIYRcHx4XF4eBAwfi7bffBgAYDAaEhobi2Wefxdy5c+vtv2bNGixfvhwnT56Eo6OjWZ+p1Wrh6emJkpISaDS8QUOr0FUAP/8T2LvyxsnYOoFRwMApQL/HuZwBkYU0N9dk+y9Pp9Ph0KFDmDdvnnGbUqlEQkIC0tLSGjzmm2++QXx8PKZNm4avv/4afn5+GD9+PObMmQOVquGlYaurq1FdXW18rtVqG9yPLKDoNHD8SyD9/RtXw2pCgK5DgS73ABF/Atx85K2RiOQL/qKiIuj1egQEBJhsDwgIwMmTJxs85uzZs9ixYwcmTJiALVu24PTp0/jLX/6CmpoaLFq0qMFjUlJS8NJLL1m8frquphI4thlI/7e0/EEdz1Bg6CKg9595MpbIytjU79oGgwH+/v549913oVKpEB0djdzcXCxfvrzR4J83bx6Sk5ONz7VaLUJDQ9uq5PZLexlIfw84+L60aBkgLWLW6S6g1xgg6hHOpSeyUrIFv6+vL1QqFfLz80225+fnIzAwsMFjgoKC4OjoaNLWiYyMRF5eHnQ6HdRqdb1jnJyc4OTUxJsz062VFwE7Xpaunq1bf94zFBj4f9KMHDdfeesjoluS7XdwtVqN6OhopKamGrcZDAakpqYiPj6+wWMGDx6M06dPw2AwGLdlZ2cjKCiowdAnCzIYgAP/BlYNAA6tl0I/bBDw6EfAcxnAHbMY+kQ2QtZWT3JyMiZNmoSYmBjExsZi5cqVKC8vR1JSEgBg4sSJCAkJQUpKCgDgmWeewdtvv40ZM2bg2WefxalTp7B06VI899xzcn6N9k97CfhyKnDuR+l5YBQwYhkQPkjeusimCCFQW1sLvf4mN4ynRv2x29ESsgb/2LFjUVhYiIULFyIvLw/9+vXD1q1bjSd8c3JyoPzdicHQ0FB8//33mDVrFvr06YOQkBDMmDEDc+bMkesrtG9CAEc3AlvnSjcEd3QF7l0CxDwJKHmDbWo6nU6Hy5cvo6KiGfchJhMKhQIdO3aEu7t7y99Lznn8cuA8/ibKPQR8P1+6eTgABPUD/vwe4NtN1rLI9hgMBpw6dQoqlQp+fn5Qq9VQcHXUZhFCoLCwEBUVFejWrVu9kb/NzOMnK6KvBcoLgSunpdsQZn4K5B2VXnN0Bf70NyB+OuDA8yjUfDqdznhxpqurq9zl2Cw/Pz+cP38eNTU1LW75MPjtUXUpcHQTcPhD4MqZ+lfXAoBKLU3LHLoQ8OzY9jVSu6Pk9RwtYsnfkhj89kRfKy2lsHs5UP2HK5gVKqBDuHTz8K5DgT5jAVdveeokolbF4G+PtJeAMzsBRxfApYM09bLyKpC2GricIe3j0xWI+T+g2zBpH2dPrp1DZCf4X3p7IgRw+APg+wUNt28AKeCHvQr0m8ClFIjaUEREBGbOnImZM2fKXQqD3yac3SWtdOnsKd2gpPIqUHwWqLombVO7A3odUFYAFJ+RjvHvKd3kpPIqoHKU9vPtBtw1F/AIaPyziMhoyJAh6NevH1auXNni90pPT4ebm1vLi7IABr+1u5YDbJoIVJU0bX8HZ+CeF4Hbn+Fce6JWJoSAXq+Hg8Oto9TPz68NKmoa/q5vzfQ1wOdPSqEf3B8Y/ndpaYShi4BHPwQmfwuM2wCMeRd45APg8S+AGZnAoOkMfbJaQghU6GpleTTnsqXJkyfjxx9/xFtvvQWFQgGFQoH169dDoVDgu+++Q3R0NJycnLB3716cOXMGDz74IAICAuDu7o6BAwfihx9+MHm/iIgIk98cFAoF3nvvPYwZMwaurq7o1q0bvvnmG0v9Md8UR/zWpPKq1LZRqID8X4Cf10g3H3f2lIK9Q7jcFRK1WGWNHj0Xfi/LZx9fkghXddNi76233kJ2djZ69+6NJUuWAAB+/fVXAMDcuXPx+uuvo3PnzujQoQMuXryIkSNH4tVXX4WTkxM+/PBDjBo1CllZWQgLC2v0M1566SUsW7YMy5cvx6pVqzBhwgRcuHAB3t6tO6OOwW8Nyq8An08Gzu2WnqvUUs++zoOrGfpEbczT0xNqtRqurq7GFYPr7hWyZMkS3HvvvcZ9vb290bdvX+Pzl19+GV9++SW++eYbTJ8+vdHPmDx5MsaNGwcAWLp0Kf7xj3/gwIEDGD58eGt8JSMGv9yKTgMfPwxcPXdjm14HqD2AiMFA38eAyFHy1UdkYS6OKhxfkijbZ1tCTEyMyfOysjIsXrwY3377LS5fvoza2lpUVlYiJyfnpu/Tp08f489ubm7QaDQoKCiwSI03w+CXU2k+sPZe6UYmXmFSv949EKgukda4V5l3X2Eia6ZQKJrcbrFWf5ydM3v2bGzfvh2vv/46unbtChcXFzz88MPQ6XSNvIPkj/cOVygUJsvOtxbb/tO3dXvekELfvxcw8SvA3V/azvvSElkFtVrdpGWk9+3bh8mTJ2PMmDEApN8Azp8/38rVmY+zeuRS8htwaJ308/CUG6FPRFYjIiIC+/fvx/nz51FUVNToaLxbt27YvHkzMjIykJmZifHjx7fJyN1cDH657H5d6uVH3Al0vkvuaoioAbNnz4ZKpULPnj3h5+fXaM9+xYoV6NChAwYNGoRRo0YhMTERAwYMaONqm47r8cvh6nlgVbS0hk7SViC84VtNErUHVVVVOHfuHDp16gRnZ2e5y7FZN/tzbG6uccTf1oSQbnBiqAW63MPQJ6I2x+Bva5kbgJP/A5SO0m0MiYjaGIO/LZX8Bnz3vPTzkLnSTcuJiNoYg7+tGAzAV3+RboDScSAweKbcFRGRnWLwt5ZaHVCYLfX0AWDfm8C5HwEHF2D0Gt70hIhkw/RpDUIAn44FzuwAbrtfuo3hjlel10YuB3y7ylsfEdk1Bn9rOLpJCn1AOpF78n/Sz1GPAv0fl68uIiKw1WN5lVeB71+Qfo5OunEC17sLcP8KQKGQrzYiInDEb3k/vARUFAG+PYARy6Sgz/4eCI0DnDzkro6IiCN+i/rp7Rvr79z/JuCgllbYjLwfcLee264RUdMMGTLEojdHnzx5MkaPHm2x9zMXR/yWIASwKwX48e/S8zv/Kq2lT0RkhTjit4T9a26E/j0vSg8ismkN3XP3/PnzOHbsGEaMGAF3d3cEBATgiSeeQFFRkfG4zz//HFFRUXBxcYGPjw8SEhJQXl6OxYsX44MPPsDXX39tfL9du3bJ8t044m+paxeB1OtLL9y7BBg8Q956iKydEEBNhTyf7eja5AkWDd1z19HREbGxsZgyZQrefPNNVFZWYs6cOXj00UexY8cOXL58GePGjcOyZcswZswYlJaWYs+ePRBCYPbs2Thx4gS0Wi3WrZNawq19b93GMPibq7oU+G4u4N0JiJ8OfDdH+kscFg/EPyt3dUTWr6YCWBosz2e/cAlQu916PzR8z91XXnkF/fv3x9KlS437vf/++wgNDUV2djbKyspQW1uLhx56COHh0n2yo6JuLM3i4uKC6upq4/vJhcHfXIfWAxn/kX5OXwuUXgKUDtLJXCU7Z0TtWWZmJnbu3Al3d/d6r505cwbDhg3D0KFDERUVhcTERAwbNgwPP/wwOnToIEO1jWPwN4cQwJGPpZ9VTlLoA8CgZwH/SPnqIrIljq7SyFuuz26BsrIyjBo1Cn//+9/rvRYUFASVSoXt27fjp59+wrZt27Bq1SrMnz8f+/fvR6dOnVr02ZbE4G+OS0eAwhOAgzMw/SBw4B2g4irwp+flrozIdigUTW63yO2P99wdMGAAvvjiC0RERMDBoeH4VCgUGDx4MAYPHoyFCxciPDwcX375JZKTk5t8D9/Wxt5Ec2RcH+1HjgK8QoFhrwCjVwPqlo0iiMg6/fGeu9OmTUNxcTHGjRuH9PR0nDlzBt9//z2SkpKg1+uxf/9+LF26FAcPHkROTg42b96MwsJCREZGGt/v6NGjyMrKQlFREWpqamT5Xgz+pqqpAn75TPq533h5ayGiNvHHe+7qdDrs27cPer0ew4YNQ1RUFGbOnAkvLy8olUpoNBrs3r0bI0eORPfu3bFgwQK88cYbGDFiBADgqaeeQo8ePRATEwM/Pz/s27dPlu/Fe+421bEvgM+fBDQdgZlHAaWq9Yokakd4z13L4D135ZDxifS//cYx9InIpvHkblP1GQvUVrPNQ0Q2j8HfVH0elR5ERDaOrR4iIjvD4CcisjMMfiJqE3Y2gdDiLPnnx+Anolbl6OgIAKiokGlFznZCp9MBAFSqls8q5MldImpVKpUKXl5eKCgoAAC4urpCwXtPN4vBYEBhYSFcXV0bXSqiORj8RNTq6pYhrgt/aj6lUomwsDCL/KPJ4CeiVqdQKBAUFAR/f3/Z1qexdWq1GkoLLf3O4CeiNqNSqSzSo6aW4cldIiI7w+AnIrIzDH4iIjtjdz3+uosgtFqtzJUQEVlGXZ419SIvuwv+0tJSAEBoaKjMlRARWVZpaSk8PT1vuZ/d3YjFYDDg0qVL8PDwaNJ8WK1Wi9DQUFy8eLF5N26xEqxfPrZcO8D65dac+oUQKC0tRXBwcJOmfNrdiF+pVKJjx47NPk6j0djkX546rF8+tlw7wPrl1tT6mzLSr8OTu0REdobBT0RkZxj8t+Dk5IRFixbByclJ7lLMwvrlY8u1A6xfbq1Zv92d3CUisncc8RMR2RkGPxGRnWHwExHZGQY/EZGdYfDfxOrVqxEREQFnZ2fExcXhwIEDcpfUoJSUFAwcOBAeHh7w9/fH6NGjkZWVZbJPVVUVpk2bBh8fH7i7u+PPf/4z8vPzZaq4ca+99hoUCgVmzpxp3Gbttefm5uLxxx+Hj48PXFxcEBUVhYMHDxpfF0Jg4cKFCAoKgouLCxISEnDq1CkZK75Br9fjxRdfRKdOneDi4oIuXbrg5ZdfNlnzxZrq3717N0aNGoXg4GAoFAp89dVXJq83pdbi4mJMmDABGo0GXl5e+L//+z+UlZXJXn9NTQ3mzJmDqKgouLm5ITg4GBMnTsSlS5csX7+gBm3YsEGo1Wrx/vvvi19//VU89dRTwsvLS+Tn58tdWj2JiYli3bp14tixYyIjI0OMHDlShIWFibKyMuM+U6dOFaGhoSI1NVUcPHhQ3H777WLQoEEyVl3fgQMHREREhOjTp4+YMWOGcbs1115cXCzCw8PF5MmTxf79+8XZs2fF999/L06fPm3c57XXXhOenp7iq6++EpmZmeKBBx4QnTp1EpWVlTJWLnn11VeFj4+P+N///ifOnTsnPvvsM+Hu7i7eeust4z7WVP+WLVvE/PnzxebNmwUA8eWXX5q83pRahw8fLvr27St+/vlnsWfPHtG1a1cxbtw42eu/du2aSEhIEBs3bhQnT54UaWlpIjY2VkRHR5u8hyXqZ/A3IjY2VkybNs34XK/Xi+DgYJGSkiJjVU1TUFAgAIgff/xRCCH9hXJ0dBSfffaZcZ8TJ04IACItLU2uMk2UlpaKbt26ie3bt4u77rrLGPzWXvucOXPEHXfc0ejrBoNBBAYGiuXLlxu3Xbt2TTg5OYlPP/20LUq8qfvuu088+eSTJtseeughMWHCBCGEddf/x+BsSq3Hjx8XAER6erpxn++++04oFAqRm5vbZrULUb/+hhw4cEAAEBcuXBBCWK5+tnoaoNPpcOjQISQkJBi3KZVKJCQkIC0tTcbKmqakpAQA4O3tDQA4dOgQampqTL7PbbfdhrCwMKv5PtOmTcN9991nUiNg/bV/8803iImJwSOPPAJ/f3/0798f//73v42vnzt3Dnl5eSb1e3p6Ii4uzirqHzRoEFJTU5GdnQ0AyMzMxN69ezFixAgA1l//7zWl1rS0NHh5eSEmJsa4T0JCApRKJfbv39/mNd9KSUkJFAoFvLy8AFiufrtbpK0pioqKoNfrERAQYLI9ICAAJ0+elKmqpjEYDJg5cyYGDx6M3r17AwDy8vKgVquNf3nqBAQEIC8vT4YqTW3YsAGHDx9Genp6vdesvfazZ8/iX//6F5KTk/HCCy8gPT0dzz33HNRqNSZNmmSssaG/S9ZQ/9y5c6HVanHbbbdBpVJBr9fj1VdfxYQJEwDA6uv/vabUmpeXB39/f5PXHRwc4O3tbXXfp6qqCnPmzMG4ceOMi7RZqn4Gfzszbdo0HDt2DHv37pW7lCa5ePEiZsyYge3bt8PZ2VnucprNYDAgJiYGS5cuBQD0798fx44dw5o1azBp0iSZq7u1TZs24eOPP8Ynn3yCXr16ISMjAzNnzkRwcLBN1N9e1dTU4NFHH4UQAv/6178s/v5s9TTA19cXKpWq3syR/Px8BAYGylTVrU2fPh3/+9//sHPnTpOlpwMDA6HT6XDt2jWT/a3h+xw6dAgFBQUYMGAAHBwc4ODggB9//BH/+Mc/4ODggICAAKutHQCCgoLQs2dPk22RkZHIyckBAGON1vp36W9/+xvmzp2Lxx57DFFRUXjiiScwa9YspKSkALD++n+vKbUGBgaioKDA5PXa2loUFxdbzfepC/0LFy5g+/btJksyW6p+Bn8D1Go1oqOjkZqaatxmMBiQmpqK+Ph4GStrmBAC06dPx5dffokdO3agU6dOJq9HR0fD0dHR5PtkZWUhJydH9u8zdOhQ/PLLL8jIyDA+YmJiMGHCBOPP1lo7AAwePLje1Nns7GyEh4cDADp16oTAwECT+rVaLfbv328V9VdUVNS7cYdKpYLBYABg/fX/XlNqjY+Px7Vr13Do0CHjPjt27IDBYEBcXFyb1/xHdaF/6tQp/PDDD/Dx8TF53WL1m3Ey2i5s2LBBODk5ifXr14vjx4+Lp59+Wnh5eYm8vDy5S6vnmWeeEZ6enmLXrl3i8uXLxkdFRYVxn6lTp4qwsDCxY8cOcfDgQREfHy/i4+NlrLpxv5/VI4R1137gwAHh4OAgXn31VXHq1Cnx8ccfC1dXV/Gf//zHuM9rr70mvLy8xNdffy2OHj0qHnzwQauZzjlp0iQREhJinM65efNm4evrK55//nnjPtZUf2lpqThy5Ig4cuSIACBWrFghjhw5Ypz10pRahw8fLvr37y/2798v9u7dK7p169Zm0zlvVr9OpxMPPPCA6Nixo8jIyDD5b7m6utqi9TP4b2LVqlUiLCxMqNVqERsbK37++We5S2oQgAYf69atM+5TWVkp/vKXv4gOHToIV1dXMWbMGHH58mX5ir6JPwa/tdf+3//+V/Tu3Vs4OTmJ2267Tbz77rsmrxsMBvHiiy+KgIAA4eTkJIYOHSqysrJkqtaUVqsVM2bMEGFhYcLZ2Vl07txZzJ8/3yRorKn+nTt3Nvh3fdKkSU2u9cqVK2LcuHHC3d1daDQakZSUJEpLS2Wv/9y5c43+t7xz506L1s9lmYmI7Ax7/EREdobBT0RkZxj8RER2hsFPRGRnGPxERHaGwU9EZGcY/EREdobBT9RCERERWLlypdxlEDUZg5+oidavX19veWgASE9Px9NPP93qn89/YMhSuCwzUQv5+fnJXUKz6HQ6qNVqucsgGXHETzZnyJAheO655/D888/D29sbgYGBWLx4cZOOvXbtGqZMmQI/Pz9oNBrcc889yMzMNL6emZmJu+++Gx4eHtBoNIiOjsbBgwexa9cuJCUlGe+IpFAojJ/5x5G4QqHAO++8g/vvvx+urq6IjIxEWloaTp8+jSFDhsDNzQ2DBg3CmTNnjMecOXMGDz74IAICAuDu7o6BAwfihx9+MPnOFy5cwKxZs4yfX+eLL75Ar1694OTkhIiICLzxxhsm3zkiIgIvv/wyJk6cCI1Gg6effho6nQ7Tp09HUFAQnJ2dER4eblyKmeyARVcgImoDd911l9BoNGLx4sUiOztbfPDBB0KhUIht27bd8tiEhAQxatQokZ6eLrKzs8Vf//pX4ePjI65cuSKEEKJXr17i8ccfFydOnBDZ2dli06ZNIiMjQ1RXV4uVK1cKjUZjXDGxbmGs8PBw8eabbxo/A4AICQkRGzduFFlZWWL06NEiIiJC3HPPPWLr1q3i+PHj4vbbbxfDhw83HpORkSHWrFkjfvnlF5GdnS0WLFggnJ2djatOXrlyRXTs2FEsWbLE+PlCCHHw4EGhVCrFkiVLRFZWlli3bp1wcXExWaAvPDxcaDQa8frrr4vTp0+L06dPi+XLl4vQ0FCxe/ducf78ebFnzx7xySeftPT/GrIRDH6yOXfddVe9G5wPHDhQzJkz56bH7dmzR2g0GlFVVWWyvUuXLuKdd94RQgjh4eEh1q9f3+Dx69atE56envW2NxT8CxYsMD5PS0sTAMTatWuN2z799FPh7Ox803p79eolVq1a1ejnCCHE+PHjxb333muy7W9/+5vo2bOnyXGjR4822efZZ58V99xzjzAYDDetgdontnrIJvXp08fkeVBQUL07E/1RZmYmysrK4OPjA3d3d+Pj3LlzxrZLcnIypkyZgoSEBLz22msm7Rhz66u7B2xUVJTJtqqqKmi1WgBAWVkZZs+ejcjISHh5ecHd3R0nTpww3smrMSdOnMDgwYNNtg0ePBinTp2CXq83bvv9zbkBYPLkycjIyECPHj3w3HPPYdu2bWZ9T7JNPLlLNsnR0dHkuUKhMN41qjFlZWUICgrCrl276r1WN1tn8eLFGD9+PL799lt89913WLRoETZs2IAxY8aYXV9dP76hbXU1z549G9u3b8frr7+Orl27wsXFBQ8//DB0Ol2zPrcxbm5uJs8HDBiAc+fO4bvvvsMPP/yARx99FAkJCfj8888t8nlk3Rj8ZDcGDBiAvLw8ODg4ICIiotH9unfvju7du2PWrFkYN24c1q1bhzFjxkCtVpuMoi1p3759mDx5svEfmLKyMpw/f95kn4Y+PzIyEvv27av3Xt27d4dKpbrpZ2o0GowdOxZjx47Fww8/jOHDh6O4uBje3t4t/0Jk1djqIbuRkJCA+Ph4jB49Gtu2bcP58+fx008/Yf78+Th48CAqKysxffp07Nq1CxcuXMC+ffuQnp6OyMhIANLsmLKyMqSmpqKoqAgVFRUWq61bt27YvHkzMjIykJmZifHjx9f7DSYiIgK7d+9Gbm4uioqKAAB//etfkZqaipdffhnZ2dn44IMP8Pbbb2P27Nk3/bwVK1bg008/xcmTJ5GdnY3PPvsMgYGBDV6nQO0Pg5/shkKhwJYtW/CnP/0JSUlJ6N69Ox577DFcuHABAQEBUKlUuHLlCiZOnIju3bvj0UcfxYgRI/DSSy8BAAYNGoSpU6di7Nix8PPzw7JlyyxW24oVK9ChQwcMGjQIo0aNQmJiIgYMGGCyz5IlS3D+/Hl06dLFeO3AgAEDsGnTJmzYsAG9e/fGwoULsWTJEkyePPmmn+fh4YFly5YhJiYGAwcOxPnz57Fly5Z6N16n9om3XiQisjP8552IyM4w+Knd+Pjjj02maf7+0atXL7nLI7IabPVQu1FaWor8/PwGX3N0dER4eHgbV0RknRj8RER2hq0eIiI7w+AnIrIzDH4iIjvD4CcisjMMfiIiO8PgJyKyMwx+IiI7w+AnIrIz/w8ULKLnAfuaRQAAAABJRU5ErkJggg==\n"
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "steps = np.arange(1, MAX_ESTIMATORS, 1)\n",
+ "srfc = StreamingRFC(n_estimators_per_chunk=1,\n",
+ " max_n_estimators=np.max(steps))\n",
+ "\n",
+ "%time train_scores, test_scores = inc_partial_fit(x, y, srfc=srfc, steps=steps, sample=1)\n",
+ "print(f\"With {len(srfc.estimators_)}: {train_scores[-1]} | {test_scores[-1]}\")\n",
+ "plot_auc(steps, train_scores, test_scores)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "#### Partial random forest\n",
+ "1 estimator per 10 % subset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "steps = np.arange(1, MAX_ESTIMATORS, 1)\n",
+ "srfc = StreamingRFC(n_estimators_per_chunk=1,\n",
+ " max_n_estimators=np.max(steps))\n",
+ "\n",
+ "%time train_scores, test_scores = inc_partial_fit(x, y, srfc=srfc, steps=steps, sample=0.1)\n",
+ "print(f\"With {len(srfc.estimators_)}: {train_scores[-1]} | {test_scores[-1]}\")\n",
+ "plot_auc(steps, train_scores, test_scores)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "#### Partial random forest\n",
+ "3 estimators per 10 % subset, but /3 fewer steps"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "steps = np.arange(1, MAX_ESTIMATORS, 3)\n",
+ "srfc = StreamingRFC(n_estimators_per_chunk=3,\n",
+ " max_n_estimators=np.max(steps))\n",
+ "\n",
+ "%time train_scores, test_scores = inc_partial_fit(x, y, srfc=srfc, steps=steps, sample=0.1)\n",
+ "print(f\"With {len(srfc.estimators_)}: {train_scores[-1]} | {test_scores[-1]}\")\n",
+ "plot_auc(steps, train_scores, test_scores)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "#### Forest of partial decision trees\n",
+ "1 estimator per 10 % subset with all features"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "steps = np.arange(1, MAX_ESTIMATORS, 1)\n",
+ "srfc = StreamingRFC(n_estimators_per_chunk=1,\n",
+ " max_n_estimators=np.max(steps),\n",
+ " max_features=x.shape[1])\n",
+ "\n",
+ "%time train_scores, test_scores = inc_partial_fit(x, y, srfc=srfc, steps=steps, sample=0.1)\n",
+ "print(f\"With {len(srfc.estimators_)}: {train_scores[-1]} | {test_scores[-1]}\")\n",
+ "plot_auc(steps, train_scores, test_scores)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "# Optimised parameters\n",
+ "\n",
+ "Using a better set of parameters for this dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "from sklearn.model_selection import RandomizedSearchCV as RCV\n",
+ "\n",
+ "grid = RCV(RandomForestClassifier(n_estimators=30, \n",
+ " n_jobs=-1),\n",
+ " param_distributions={'min_samples_leaf': [1, 2, 10, 30, 60, 120, 240, 480],\n",
+ " 'min_samples_split': [2, 10, 30, 60, 120, 240, 480],\n",
+ " 'min_impurity_decrease': [0, 0.05, 0.1, 0.2, 0.3]},\n",
+ " cv=3,\n",
+ " n_iter=100,\n",
+ " verbose=10,\n",
+ " n_jobs=-1)\n",
+ "\n",
+ "x_train, x_test, y_train, y_test = train_test_split(x, y, \n",
+ " test_size=0.25,\n",
+ " random_state=1)\n",
+ "\n",
+ "grid.fit(x_train, y_train)\n",
+ "print(grid.best_estimator_.get_params(deep=True))\n",
+ "\n",
+ "tr_score, te_score = score(grid,\n",
+ " train=(x_train, y_train),\n",
+ " test=(x_test, y_test),\n",
+ " pr=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "params = grid.best_estimator_.get_params()\n",
+ "params.pop('warm_start', None)\n",
+ "params.pop('n_jobs', None)\n",
+ "params.pop('n_estimators', None)\n",
+ "params"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "### Standard random forest"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "steps = np.arange(1, MAX_ESTIMATORS, 10)\n",
+ "\n",
+ "rfc = RandomForestClassifier(warm_start=True,\n",
+ " **params)\n",
+ "\n",
+ "%time train_scores, test_scores = inc_fit(x, y, rfc=rfc, steps=steps)\n",
+ "print(f\"With {len(rfc.estimators_)}: {train_scores[-1]} | {test_scores[-1]}\")\n",
+ "plot_auc(steps, train_scores, test_scores)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "#### As normal random forest\n",
+ "1 estimator per full subset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "steps = np.arange(1, MAX_ESTIMATORS, 1)\n",
+ "srfc = StreamingRFC(n_estimators_per_chunk=1,\n",
+ " max_n_estimators=np.max(steps),\n",
+ " **params)\n",
+ "\n",
+ "%time train_scores, test_scores = inc_partial_fit(x, y, srfc=srfc, steps=steps, sample=1)\n",
+ "print(f\"With {len(srfc.estimators_)}: {train_scores[-1]} | {test_scores[-1]}\")\n",
+ "plot_auc(steps, train_scores, test_scores)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "#### Partial random forest\n",
+ "1 estimator per 10 % subset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "steps = np.arange(1, MAX_ESTIMATORS, 1)\n",
+ "srfc = StreamingRFC(n_estimators=1,\n",
+ " max_n_estimators=np.max(steps),\n",
+ " **params)\n",
+ "\n",
+ "%time train_scores, test_scores = inc_partial_fit(x, y, srfc=srfc, steps=steps, sample=0.1)\n",
+ "print(f\"With {len(srfc.estimators_)}: {train_scores[-1]} | {test_scores[-1]}\")\n",
+ "plot_auc(steps, train_scores, test_scores)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "#### Partial random forest\n",
+ "3 estimators per 10 % subset, but /3 fewer steps"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "steps = np.arange(1, MAX_ESTIMATORS, 3)\n",
+ "srfc = StreamingRFC(n_estimators=3,\n",
+ " max_n_estimators=np.max(steps),\n",
+ " **params)\n",
+ "\n",
+ "%time train_scores, test_scores = inc_partial_fit(x, y, srfc=srfc, steps=steps, sample=0.1)\n",
+ "print(f\"With {len(srfc.estimators_)}: {train_scores[-1]} | {test_scores[-1]}\")\n",
+ "plot_auc(steps, train_scores, test_scores)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "#### Forest of partial decision trees\n",
+ "1 estimator per 10 % subset with all features"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "params.pop('max_features', None)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "steps = np.arange(1, MAX_ESTIMATORS, 1)\n",
+ "srfc = StreamingRFC(n_estimators=1,\n",
+ " max_n_estimators=np.max(steps),\n",
+ " max_features=x.shape[1],\n",
+ " **params)\n",
+ "\n",
+ "%time train_scores, test_scores = inc_partial_fit(x, y, srfc=srfc, steps=steps, sample=0.1)\n",
+ "print(f\"With {len(srfc.estimators_)}: {train_scores[-1]} | {test_scores[-1]}\")\n",
+ "plot_auc(steps, train_scores, test_scores)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.7"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
\ No newline at end of file
diff --git a/scripts/PerformanceComparisonsDask.ipynb b/scripts/PerformanceComparisonsDask.ipynb
new file mode 100644
index 0000000..e130615
--- /dev/null
+++ b/scripts/PerformanceComparisonsDask.ipynb
@@ -0,0 +1,649 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "# Performance comparisons\n",
+ "\n",
+ "In memory and out of memory, using dask."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The autoreload extension is already loaded. To reload it, use:\n",
+ " %reload_ext autoreload\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Change dir to repo root if running from repo (rather than pip installed)\n",
+ "# (Assuming running from [repo]/scripts/)\n",
+ "import os\n",
+ "os.chdir('../')\n",
+ "\n",
+ "%load_ext autoreload\n",
+ "%autoreload 2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "import matplotlib.pyplot as plt\n",
+ "import numpy as np\n",
+ "\n",
+ "from typing import Tuple\n",
+ "\n",
+ "from incremental_trees.trees import StreamingRFC\n",
+ "\n",
+ "from sklearn.ensemble import RandomForestClassifier\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "from sklearn.metrics import roc_auc_score\n",
+ "\n",
+ "import dask_ml.datasets\n",
+ "from dask_ml.wrappers import Incremental\n",
+ "from dask.distributed import Client, LocalCluster\n",
+ "from dask_ml.model_selection import train_test_split as dask_tts\n",
+ "\n",
+ "import dask as dd\n",
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# Settings\n",
+ "MAX_ESTIMATORS = 60 # Lower to run faster"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2023-01-01 21:04:41,365 - distributed.deploy.spec - WARNING - Cluster closed without starting up\n"
+ ]
+ },
+ {
+ "ename": "RuntimeError",
+ "evalue": "Cluster failed to start: Scheduler failed to start.",
+ "output_type": "error",
+ "traceback": [
+ "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
+ "\u001B[1;31mOSError\u001B[0m Traceback (most recent call last)",
+ "File \u001B[1;32m~\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\distributed\\core.py:524\u001B[0m, in \u001B[0;36mServer.start\u001B[1;34m(self)\u001B[0m\n\u001B[0;32m 523\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[1;32m--> 524\u001B[0m \u001B[38;5;28;01mawait\u001B[39;00m asyncio\u001B[38;5;241m.\u001B[39mwait_for(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mstart_unsafe(), timeout\u001B[38;5;241m=\u001B[39mtimeout)\n\u001B[0;32m 525\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m asyncio\u001B[38;5;241m.\u001B[39mTimeoutError \u001B[38;5;28;01mas\u001B[39;00m exc:\n",
+ "File \u001B[1;32m~\\anaconda3\\envs\\IncrementalTrees\\lib\\asyncio\\tasks.py:442\u001B[0m, in \u001B[0;36mwait_for\u001B[1;34m(fut, timeout, loop)\u001B[0m\n\u001B[0;32m 441\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m timeout \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[1;32m--> 442\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;01mawait\u001B[39;00m fut\n\u001B[0;32m 444\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m timeout \u001B[38;5;241m<\u001B[39m\u001B[38;5;241m=\u001B[39m \u001B[38;5;241m0\u001B[39m:\n",
+ "File \u001B[1;32m~\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\distributed\\scheduler.py:3880\u001B[0m, in \u001B[0;36mScheduler.start_unsafe\u001B[1;34m(self)\u001B[0m\n\u001B[0;32m 3879\u001B[0m \u001B[38;5;28;01mfor\u001B[39;00m addr \u001B[38;5;129;01min\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_start_address:\n\u001B[1;32m-> 3880\u001B[0m \u001B[38;5;28;01mawait\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mlisten(\n\u001B[0;32m 3881\u001B[0m addr,\n\u001B[0;32m 3882\u001B[0m allow_offload\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mFalse\u001B[39;00m,\n\u001B[0;32m 3883\u001B[0m handshake_overrides\u001B[38;5;241m=\u001B[39m{\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mpickle-protocol\u001B[39m\u001B[38;5;124m\"\u001B[39m: \u001B[38;5;241m4\u001B[39m, \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mcompression\u001B[39m\u001B[38;5;124m\"\u001B[39m: \u001B[38;5;28;01mNone\u001B[39;00m},\n\u001B[0;32m 3884\u001B[0m \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39m\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39msecurity\u001B[38;5;241m.\u001B[39mget_listen_args(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mscheduler\u001B[39m\u001B[38;5;124m\"\u001B[39m),\n\u001B[0;32m 3885\u001B[0m )\n\u001B[0;32m 3886\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mip \u001B[38;5;241m=\u001B[39m get_address_host(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mlisten_address)\n",
+ "File \u001B[1;32m~\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\distributed\\core.py:707\u001B[0m, in \u001B[0;36mServer.listen\u001B[1;34m(self, port_or_addr, allow_offload, **kwargs)\u001B[0m\n\u001B[0;32m 706\u001B[0m \u001B[38;5;28;01massert\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(addr, \u001B[38;5;28mstr\u001B[39m)\n\u001B[1;32m--> 707\u001B[0m listener \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mawait\u001B[39;00m listen(\n\u001B[0;32m 708\u001B[0m addr,\n\u001B[0;32m 709\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mhandle_comm,\n\u001B[0;32m 710\u001B[0m deserialize\u001B[38;5;241m=\u001B[39m\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mdeserialize,\n\u001B[0;32m 711\u001B[0m allow_offload\u001B[38;5;241m=\u001B[39mallow_offload,\n\u001B[0;32m 712\u001B[0m \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs,\n\u001B[0;32m 713\u001B[0m )\n\u001B[0;32m 714\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mlisteners\u001B[38;5;241m.\u001B[39mappend(listener)\n",
+ "File \u001B[1;32m~\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\distributed\\comm\\core.py:212\u001B[0m, in \u001B[0;36mListener.__await__.._\u001B[1;34m()\u001B[0m\n\u001B[0;32m 211\u001B[0m \u001B[38;5;28;01masync\u001B[39;00m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21m_\u001B[39m():\n\u001B[1;32m--> 212\u001B[0m \u001B[38;5;28;01mawait\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mstart()\n\u001B[0;32m 213\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\n",
+ "File \u001B[1;32m~\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\distributed\\comm\\tcp.py:580\u001B[0m, in \u001B[0;36mBaseTCPListener.start\u001B[1;34m(self)\u001B[0m\n\u001B[0;32m 575\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[0;32m 576\u001B[0m \u001B[38;5;66;03m# When shuffling data between workers, there can\u001B[39;00m\n\u001B[0;32m 577\u001B[0m \u001B[38;5;66;03m# really be O(cluster size) connection requests\u001B[39;00m\n\u001B[0;32m 578\u001B[0m \u001B[38;5;66;03m# on a single worker socket, make sure the backlog\u001B[39;00m\n\u001B[0;32m 579\u001B[0m \u001B[38;5;66;03m# is large enough not to lose any.\u001B[39;00m\n\u001B[1;32m--> 580\u001B[0m sockets \u001B[38;5;241m=\u001B[39m \u001B[43mnetutil\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mbind_sockets\u001B[49m\u001B[43m(\u001B[49m\n\u001B[0;32m 581\u001B[0m \u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mport\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43maddress\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mip\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mbacklog\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mbacklog\u001B[49m\n\u001B[0;32m 582\u001B[0m \u001B[43m \u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 583\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mOSError\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m e:\n\u001B[0;32m 584\u001B[0m \u001B[38;5;66;03m# EADDRINUSE can happen sporadically when trying to bind\u001B[39;00m\n\u001B[0;32m 585\u001B[0m \u001B[38;5;66;03m# to an ephemeral port\u001B[39;00m\n",
+ "File \u001B[1;32m~\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\tornado\\netutil.py:162\u001B[0m, in \u001B[0;36mbind_sockets\u001B[1;34m(port, address, family, backlog, flags, reuse_port)\u001B[0m\n\u001B[0;32m 161\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[1;32m--> 162\u001B[0m \u001B[43msock\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mbind\u001B[49m\u001B[43m(\u001B[49m\u001B[43msockaddr\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 163\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mOSError\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m e:\n",
+ "\u001B[1;31mOSError\u001B[0m: [WinError 10048] Only one usage of each socket address (protocol/network address/port) is normally permitted",
+ "\nThe above exception was the direct cause of the following exception:\n",
+ "\u001B[1;31mRuntimeError\u001B[0m Traceback (most recent call last)",
+ "File \u001B[1;32m~\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\distributed\\deploy\\spec.py:309\u001B[0m, in \u001B[0;36mSpecCluster._start\u001B[1;34m(self)\u001B[0m\n\u001B[0;32m 308\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mscheduler \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mcls\u001B[39m(\u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39m\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mscheduler_spec\u001B[38;5;241m.\u001B[39mget(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124moptions\u001B[39m\u001B[38;5;124m\"\u001B[39m, {}))\n\u001B[1;32m--> 309\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mscheduler \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mawait\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mscheduler\n\u001B[0;32m 310\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mscheduler_comm \u001B[38;5;241m=\u001B[39m rpc(\n\u001B[0;32m 311\u001B[0m \u001B[38;5;28mgetattr\u001B[39m(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mscheduler, \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mexternal_address\u001B[39m\u001B[38;5;124m\"\u001B[39m, \u001B[38;5;28;01mNone\u001B[39;00m)\n\u001B[0;32m 312\u001B[0m \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mscheduler\u001B[38;5;241m.\u001B[39maddress,\n\u001B[0;32m 313\u001B[0m connection_args\u001B[38;5;241m=\u001B[39m\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39msecurity\u001B[38;5;241m.\u001B[39mget_connection_args(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mclient\u001B[39m\u001B[38;5;124m\"\u001B[39m),\n\u001B[0;32m 314\u001B[0m )\n",
+ "File \u001B[1;32m~\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\distributed\\core.py:532\u001B[0m, in \u001B[0;36mServer.start\u001B[1;34m(self)\u001B[0m\n\u001B[0;32m 531\u001B[0m \u001B[38;5;28;01mawait\u001B[39;00m _close_on_failure(exc)\n\u001B[1;32m--> 532\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mRuntimeError\u001B[39;00m(\u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;132;01m{\u001B[39;00m\u001B[38;5;28mtype\u001B[39m(\u001B[38;5;28mself\u001B[39m)\u001B[38;5;241m.\u001B[39m\u001B[38;5;18m__name__\u001B[39m\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m failed to start.\u001B[39m\u001B[38;5;124m\"\u001B[39m) \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mexc\u001B[39;00m\n\u001B[0;32m 533\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mstatus \u001B[38;5;241m=\u001B[39m Status\u001B[38;5;241m.\u001B[39mrunning\n",
+ "\u001B[1;31mRuntimeError\u001B[0m: Scheduler failed to start.",
+ "\nThe above exception was the direct cause of the following exception:\n",
+ "\u001B[1;31mRuntimeError\u001B[0m Traceback (most recent call last)",
+ "Cell \u001B[1;32mIn[26], line 2\u001B[0m\n\u001B[0;32m 1\u001B[0m \u001B[38;5;66;03m# Prepare dask cluster\u001B[39;00m\n\u001B[1;32m----> 2\u001B[0m cluster \u001B[38;5;241m=\u001B[39m \u001B[43mLocalCluster\u001B[49m\u001B[43m(\u001B[49m\n\u001B[0;32m 3\u001B[0m \u001B[43m \u001B[49m\u001B[43mprocesses\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43;01mFalse\u001B[39;49;00m\u001B[43m,\u001B[49m\n\u001B[0;32m 4\u001B[0m \u001B[43m \u001B[49m\u001B[43mn_workers\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;241;43m2\u001B[39;49m\u001B[43m,\u001B[49m\n\u001B[0;32m 5\u001B[0m \u001B[43m \u001B[49m\u001B[43mthreads_per_worker\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;241;43m2\u001B[39;49m\u001B[43m,\u001B[49m\n\u001B[0;32m 6\u001B[0m \u001B[43m \u001B[49m\u001B[43mscheduler_port\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;241;43m8383\u001B[39;49m\n\u001B[0;32m 7\u001B[0m \u001B[43m)\u001B[49m\n\u001B[0;32m 8\u001B[0m client \u001B[38;5;241m=\u001B[39m Client(cluster)\n\u001B[0;32m 9\u001B[0m client\n",
+ "File \u001B[1;32m~\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\distributed\\deploy\\local.py:253\u001B[0m, in \u001B[0;36mLocalCluster.__init__\u001B[1;34m(self, name, n_workers, threads_per_worker, processes, loop, start, host, ip, scheduler_port, silence_logs, dashboard_address, worker_dashboard_address, diagnostics_port, services, worker_services, service_kwargs, asynchronous, security, protocol, blocked_handlers, interface, worker_class, scheduler_kwargs, scheduler_sync_interval, **worker_kwargs)\u001B[0m\n\u001B[0;32m 250\u001B[0m worker \u001B[38;5;241m=\u001B[39m {\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mcls\u001B[39m\u001B[38;5;124m\"\u001B[39m: worker_class, \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124moptions\u001B[39m\u001B[38;5;124m\"\u001B[39m: worker_kwargs}\n\u001B[0;32m 251\u001B[0m workers \u001B[38;5;241m=\u001B[39m {i: worker \u001B[38;5;28;01mfor\u001B[39;00m i \u001B[38;5;129;01min\u001B[39;00m \u001B[38;5;28mrange\u001B[39m(n_workers)}\n\u001B[1;32m--> 253\u001B[0m \u001B[38;5;28;43msuper\u001B[39;49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[38;5;21;43m__init__\u001B[39;49m\u001B[43m(\u001B[49m\n\u001B[0;32m 254\u001B[0m \u001B[43m \u001B[49m\u001B[43mname\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mname\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 255\u001B[0m \u001B[43m \u001B[49m\u001B[43mscheduler\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mscheduler\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 256\u001B[0m \u001B[43m \u001B[49m\u001B[43mworkers\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mworkers\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 257\u001B[0m \u001B[43m \u001B[49m\u001B[43mworker\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mworker\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 258\u001B[0m \u001B[43m \u001B[49m\u001B[43mloop\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mloop\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 259\u001B[0m \u001B[43m \u001B[49m\u001B[43masynchronous\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43masynchronous\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 260\u001B[0m \u001B[43m \u001B[49m\u001B[43msilence_logs\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43msilence_logs\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 261\u001B[0m \u001B[43m \u001B[49m\u001B[43msecurity\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43msecurity\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 262\u001B[0m \u001B[43m \u001B[49m\u001B[43mscheduler_sync_interval\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mscheduler_sync_interval\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 263\u001B[0m \u001B[43m\u001B[49m\u001B[43m)\u001B[49m\n",
+ "File \u001B[1;32m~\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\distributed\\deploy\\spec.py:275\u001B[0m, in \u001B[0;36mSpecCluster.__init__\u001B[1;34m(self, workers, scheduler, worker, asynchronous, loop, security, silence_logs, name, shutdown_on_close, scheduler_sync_interval)\u001B[0m\n\u001B[0;32m 273\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m called_from_running_loop:\n\u001B[0;32m 274\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_loop_runner\u001B[38;5;241m.\u001B[39mstart()\n\u001B[1;32m--> 275\u001B[0m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43msync\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_start\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 276\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[0;32m 277\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39msync(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_correct_state)\n",
+ "File \u001B[1;32m~\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\distributed\\utils.py:339\u001B[0m, in \u001B[0;36mSyncMethodMixin.sync\u001B[1;34m(self, func, asynchronous, callback_timeout, *args, **kwargs)\u001B[0m\n\u001B[0;32m 337\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m future\n\u001B[0;32m 338\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m--> 339\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m sync(\n\u001B[0;32m 340\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mloop, func, \u001B[38;5;241m*\u001B[39margs, callback_timeout\u001B[38;5;241m=\u001B[39mcallback_timeout, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs\n\u001B[0;32m 341\u001B[0m )\n",
+ "File \u001B[1;32m~\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\distributed\\utils.py:406\u001B[0m, in \u001B[0;36msync\u001B[1;34m(loop, func, callback_timeout, *args, **kwargs)\u001B[0m\n\u001B[0;32m 404\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m error:\n\u001B[0;32m 405\u001B[0m typ, exc, tb \u001B[38;5;241m=\u001B[39m error\n\u001B[1;32m--> 406\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m exc\u001B[38;5;241m.\u001B[39mwith_traceback(tb)\n\u001B[0;32m 407\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m 408\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m result\n",
+ "File \u001B[1;32m~\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\distributed\\utils.py:379\u001B[0m, in \u001B[0;36msync..f\u001B[1;34m()\u001B[0m\n\u001B[0;32m 377\u001B[0m future \u001B[38;5;241m=\u001B[39m asyncio\u001B[38;5;241m.\u001B[39mwait_for(future, callback_timeout)\n\u001B[0;32m 378\u001B[0m future \u001B[38;5;241m=\u001B[39m asyncio\u001B[38;5;241m.\u001B[39mensure_future(future)\n\u001B[1;32m--> 379\u001B[0m result \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01myield\u001B[39;00m future\n\u001B[0;32m 380\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mException\u001B[39;00m:\n\u001B[0;32m 381\u001B[0m error \u001B[38;5;241m=\u001B[39m sys\u001B[38;5;241m.\u001B[39mexc_info()\n",
+ "File \u001B[1;32m~\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\tornado\\gen.py:769\u001B[0m, in \u001B[0;36mRunner.run\u001B[1;34m(self)\u001B[0m\n\u001B[0;32m 766\u001B[0m exc_info \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m\n\u001B[0;32m 768\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[1;32m--> 769\u001B[0m value \u001B[38;5;241m=\u001B[39m \u001B[43mfuture\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mresult\u001B[49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 770\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mException\u001B[39;00m:\n\u001B[0;32m 771\u001B[0m exc_info \u001B[38;5;241m=\u001B[39m sys\u001B[38;5;241m.\u001B[39mexc_info()\n",
+ "File \u001B[1;32m~\\anaconda3\\envs\\IncrementalTrees\\lib\\site-packages\\distributed\\deploy\\spec.py:319\u001B[0m, in \u001B[0;36mSpecCluster._start\u001B[1;34m(self)\u001B[0m\n\u001B[0;32m 317\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mstatus \u001B[38;5;241m=\u001B[39m Status\u001B[38;5;241m.\u001B[39mfailed\n\u001B[0;32m 318\u001B[0m \u001B[38;5;28;01mawait\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_close()\n\u001B[1;32m--> 319\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mRuntimeError\u001B[39;00m(\u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mCluster failed to start: \u001B[39m\u001B[38;5;132;01m{\u001B[39;00me\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m\"\u001B[39m) \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01me\u001B[39;00m\n",
+ "\u001B[1;31mRuntimeError\u001B[0m: Cluster failed to start: Scheduler failed to start."
+ ]
+ }
+ ],
+ "source": [
+ "# Prepare dask cluster\n",
+ "cluster = LocalCluster(\n",
+ " processes=False,\n",
+ " n_workers=2,\n",
+ " threads_per_worker=2,\n",
+ " scheduler_port=8383\n",
+ ")\n",
+ "client = Client(cluster)\n",
+ "client"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "# Synthetic data, in memory\n",
+ "\n",
+ "Compare increasing estimators with RandomForest (using warm_start) against Incremental StreamingRFC (dask handles .partial_fit)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "x, y = dask_ml.datasets.make_blobs(\n",
+ " n_samples=1e5,\n",
+ " chunks=1e4,\n",
+ " random_state=0,\n",
+ " n_features=40,\n",
+ " centers=2,\n",
+ " cluster_std=100\n",
+ ")\n",
+ "\n",
+ "x_dd = dd.dataframe.from_array(x, chunksize=1e4)\n",
+ "y_dd = dd.dataframe.from_array(y, chunksize=1e4)\n",
+ "\n",
+ "x_pd = pd.DataFrame(x.persist().compute())\n",
+ "y_pd = pd.DataFrame(y.persist().compute())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "x_pd.memory_usage(deep=True).sum() / 1024 / 1024"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "### Standard random forest"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "def score(mod, train: Tuple[np.array, np.array], test: Tuple[np.array, np.array], pr=False) -> Tuple[float, float]:\n",
+ " \"\"\"Return ROC auc on x_train and x_test (from caller) on mod. Print if requested.\"\"\"\n",
+ "\n",
+ " y_pred_train_proba = mod.predict_proba(train[0])[:, 1]\n",
+ " y_pred_test_proba = mod.predict_proba(test[0])[:, 1]\n",
+ "\n",
+ " roc_train = roc_auc_score(train[1], y_pred_train_proba)\n",
+ " roc_test = roc_auc_score(test[1], y_pred_test_proba)\n",
+ " if pr:\n",
+ " print(f\"n_ests: {len(rfc.estimators_)}\")\n",
+ " print(f'Train AUC: {roc_train}')\n",
+ " print(f'Test AUC: {roc_test}')\n",
+ "\n",
+ " return roc_train, roc_test\n",
+ "\n",
+ "\n",
+ "def score_dask(mod, train: Tuple[np.array, np.array], test: Tuple[np.array, np.array], pr=False) -> Tuple[float, float]:\n",
+ " \"\"\"Score model using available dask metric (accuracy).\"\"\"\n",
+ "\n",
+ " roc_train = mod.score(train[0], train[1])\n",
+ " roc_test = mod.score(test[0], test[1])\n",
+ " if pr:\n",
+ " print(f\"n_ests: {len(rfc.estimators_)}\")\n",
+ " print(f'Train AUC: {roc_train}')\n",
+ " print(f'Test AUC: {roc_test}')\n",
+ "\n",
+ " return roc_train, roc_test\n",
+ "\n",
+ "\n",
+ "def multiple_fit(x: np.array, y: np.array, steps=np.arange(1, 101, 2), sample: int = 1):\n",
+ " \"\"\"\n",
+ " Fit a random forest model with an increasing number of estimators.\n",
+ " \n",
+ " This version doesn't use warm start and refits the model from scratch each iteration.\n",
+ " This is for the sake of comparing timings to dask function below.\n",
+ " \n",
+ " :param steps: Range to iterate over. Sets total number of estimators that will be fit in model\n",
+ " after each iteration. Should be range with constant step size.\n",
+ " :param sample: Proportion of randomly sampled training data to use on each partial_fit call.\n",
+ " If sample = 1, all training data is used on each interation,\n",
+ " so should behave as standard random forest. Default = 1 (100%).\n",
+ " \"\"\"\n",
+ "\n",
+ " x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=1)\n",
+ "\n",
+ " train_scores = []\n",
+ " test_scores = []\n",
+ " for s in steps:\n",
+ " # Fit full model on each iteration\n",
+ " rfc = RandomForestClassifier(warm_start=False)\n",
+ "\n",
+ " # Fit model with these n ests\n",
+ " rfc.set_params(n_estimators=s)\n",
+ " rfc.fit(x_train, y_train)\n",
+ "\n",
+ " tr_score, te_score = score(rfc, train=(x_train, y_train), test=(x_test, y_test), pr=False)\n",
+ "\n",
+ " train_scores.append(tr_score)\n",
+ " test_scores.append(te_score)\n",
+ "\n",
+ " return rfc, train_scores, test_scores\n",
+ "\n",
+ "\n",
+ "def plot_auc(steps, train_scores, test_scores):\n",
+ " \"\"\"Plot the train and test auc scores vs total number of model estimators\"\"\"\n",
+ "\n",
+ " plt.figure(figsize=(4, 4))\n",
+ " plt.plot(steps, train_scores)\n",
+ " plt.plot(steps, test_scores)\n",
+ " plt.xlabel('n_estimators')\n",
+ " plt.ylabel('auc')\n",
+ " plt.legend(['train', 'test'])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "steps = np.arange(1, MAX_ESTIMATORS, 4)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "rfc = RandomForestClassifier(warm_start=True)\n",
+ "\n",
+ "%time rfc, train_scores, test_scores = multiple_fit(x_pd.values, y_pd.values.squeeze(), steps=steps)\n",
+ "\n",
+ "print(f\"With {len(rfc.estimators_)}: {train_scores[-1]} | {test_scores[-1]}\")\n",
+ "plot_auc(steps, train_scores, test_scores)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "## Single incremental forest specs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "x_train, x_test, y_train, y_test = dask_tts(x, y, test_size=0.25)\n",
+ "\n",
+ "x_train.chunks"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "#### Incremental forest\n",
+ "1 estimator per subset, 10 % per chunk, 1 pass through data.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "srfc = Incremental(StreamingRFC(n_estimators_per_chunk=1, max_n_estimators=np.inf))\n",
+ "\n",
+ "srfc.fit(x_train, y_train, classes=[0, 1])\n",
+ "\n",
+ "tr_score, te_score = score(srfc, train=(x_train, y_train), test=(x_test, y_test), pr=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "#### Incremental forest\n",
+ "20 estimators per subset (different features), 10 % per chunk, 1 pass through data."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "srfc = Incremental(StreamingRFC(n_estimators_per_chunk=20, max_n_estimators=np.inf))\n",
+ "\n",
+ "srfc.fit(x_train, y_train, classes=[0, 1])\n",
+ "\n",
+ "tr_score, te_score = score(srfc, train=(x_train, y_train), test=(x_test, y_test), pr=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "#### Forest of partial decision trees\n",
+ "1 estimator per subset with all features, 10 % per chunk, 1 pass through data."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "srfc = Incremental(StreamingRFC(\n",
+ " n_estimators_per_chunk=1,\n",
+ " max_n_estimators=np.max(steps),\n",
+ " max_features=x.shape[1])\n",
+ ")\n",
+ "\n",
+ "srfc.fit(x_train, y_train,\n",
+ " classes=[0, 1])\n",
+ "\n",
+ "tr_score, te_score = score(srfc, train=(x_train, y_train), test=(x_test, y_test), pr=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "#### Forest of partial decision trees\n",
+ "20 estimator per subset with all features, 10 % per chunk, 1 pass through data.\n",
+ "\n",
+ "Extra estimators shouldn't help here?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "srfc = Incremental(StreamingRFC(\n",
+ " n_estimators_per_chunk=20,\n",
+ " max_n_estimators=np.max(steps),\n",
+ " max_features=x.shape[1])\n",
+ ")\n",
+ "\n",
+ "srfc.fit(x_train, y_train, classes=[0, 1])\n",
+ "\n",
+ "tr_score, te_score = score(srfc, train=(x_train, y_train), test=(x_test, y_test), pr=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "### n estimators per chunk vs performance\n",
+ "\n",
+ "Effect of increasing estimators per subset (with different set ups)\n",
+ "\n",
+ "Function here add Incremental to supplied model, and uses .fit to refit the full model in each iteration.\n",
+ "\n",
+ "The other functions (above and in PerformanceComparisons.ipynb) do incremental fits using warm start (either directly or via .partial_fit). \n",
+ "\n",
+ "This means the timing information cannot be directly compared!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "def multiple_dask_fit(x: np.ndarray, y: np.ndarray, steps=np.arange(1, 101, 2),\n",
+ " **kwargs) -> None:\n",
+ " \"\"\"\n",
+ " Fit increasing number of estimators using .partial_fit on a subsample of the training data.\n",
+ " Uses Dask by adding Incremental to model and calling fit. This refits the whole model one each\n",
+ " iteration, so will be slower than the other test functions. Timing this function can only be compared\n",
+ " to other calls of this function.\n",
+ " \n",
+ " The data passed to the Random forest fit by partial_fit is handled by dask and is sequential batches\n",
+ " of data, rather than random samples (as used by inc_partial_fit in PerformanceComparisons.ipynb).\n",
+ " \n",
+ " StreamingRFC.n_estimators: Number of estimators that will be fit in each step. Set from first\n",
+ " difference in range (ie. range[1]-range[0])\n",
+ " StreamingRFC.max_n_estimators: Limit on number of estimators than will be fit in model. Should >\n",
+ " range[-1].\n",
+ " \n",
+ " :param steps: Range to iterate over. Sets total number of estimators that will be fit in model\n",
+ " after each iteration. Should be range with constant step size.\n",
+ " \"\"\"\n",
+ "\n",
+ " x_train, x_test, y_train, y_test = dask_tts(x, y, test_size=0.25)\n",
+ "\n",
+ " train_scores = []\n",
+ " test_scores = []\n",
+ " for s in steps:\n",
+ " # Create fresh model each iteration\n",
+ " srfc_ = StreamingRFC(n_estimators_per_chunk=s, max_n_estimators=np.inf, **kwargs)\n",
+ "\n",
+ " # Add Incremental\n",
+ " srfc_ = Incremental(srfc_)\n",
+ "\n",
+ " # Fit model with these n ests\n",
+ " # From scratch each time\n",
+ " srfc_.fit(x_train, y_train,\n",
+ " classes=[0, 1])\n",
+ "\n",
+ " tr_score, te_score = score(\n",
+ " srfc_,\n",
+ " train=(x_train, y_train),\n",
+ " test=(x_test, y_test),\n",
+ " pr=False)\n",
+ "\n",
+ " train_scores.append(tr_score)\n",
+ " test_scores.append(te_score)\n",
+ "\n",
+ " return srfc_, train_scores, test_scores"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "#### Incremental forest\n",
+ "*range* estimators per subset (different features), 10 % per chunk, 1 pass through data."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "steps = np.arange(1, MAX_ESTIMATORS, 6)\n",
+ "\n",
+ "%time final_est, train_scores, test_scores = multiple_dask_fit(x, y, steps=steps)\n",
+ "print(f\"With {len(final_est.estimators_)}: {train_scores[-1]} | {test_scores[-1]}\")\n",
+ "plot_auc(steps, train_scores, test_scores)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "#### Forest of partial decision trees\n",
+ "*range* estimators per subset with all features, 10 % per chunk, 1 pass through data."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "steps = np.arange(1, MAX_ESTIMATORS, 6)\n",
+ "\n",
+ "%time final_est, train_scores, test_scores = multiple_dask_fit(x, y, steps=steps, max_features=x.shape[1])\n",
+ "print(f\"With {len(final_est.estimators_)}: {train_scores[-1]} | {test_scores[-1]}\")\n",
+ "plot_auc(steps, train_scores, test_scores)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.5"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
\ No newline at end of file
diff --git a/tests/integration/incremental_trees/__init__.py b/scripts/__init__.py
similarity index 100%
rename from tests/integration/incremental_trees/__init__.py
rename to scripts/__init__.py
diff --git a/scripts/example_dask.py b/scripts/example_dask.py
new file mode 100644
index 0000000..f443640
--- /dev/null
+++ b/scripts/example_dask.py
@@ -0,0 +1,40 @@
+import dask as dd
+import dask_ml.cluster
+import dask_ml.datasets
+import numpy as np
+from dask.distributed import Client, LocalCluster
+from dask_ml.wrappers import Incremental
+
+from scripts.trees import StreamingRFC
+
+
+def run_on_blobs():
+ x, y = dask_ml.datasets.make_blobs(n_samples=1e8, chunks=1e5, random_state=0, centers=3)
+
+ x = dd.dataframe.from_array(x)
+ y = dd.dataframe.from_array(y)
+
+ print(f"Rows: {x.shape[0].compute()}")
+
+ ests_per_chunk = 4
+ chunks = len(x.divisions)
+ print(f"n chunks: {chunks}")
+
+ srfc = Incremental(StreamingRFC(
+ n_estimators_per_chunk=ests_per_chunk,
+ max_n_estimators=np.inf,
+ verbose=1,
+ n_jobs=4)
+ )
+ srfc.fit(x, y, classes=y.unique().compute())
+
+
+if __name__ == "__main__":
+ # Create, connect, and run on local cluster.
+ with LocalCluster(processes=False,
+ n_workers=2,
+ threads_per_worker=2,
+ scheduler_port=8080,
+ diagnostics_port=8081) as cluster, Client(cluster) as client:
+ print(client)
+ run_on_blobs()
diff --git a/scripts/example_fit.py b/scripts/example_fit.py
new file mode 100644
index 0000000..b952ea0
--- /dev/null
+++ b/scripts/example_fit.py
@@ -0,0 +1,21 @@
+import numpy as np
+from sklearn.datasets import make_blobs
+
+from incremental_trees.models.classification.streaming_rfc import StreamingRFC
+
+if __name__ == "__main__":
+ # Generate some data in memory
+ x, y = make_blobs(n_samples=int(2e5), random_state=0, n_features=40, centers=2, cluster_std=100)
+
+ srfc = StreamingRFC(
+ n_estimators_per_chunk=3,
+ max_n_estimators=np.inf,
+ spf_n_fits=30, # Number of calls to .partial_fit()
+ spf_sample_prop=0.3 # Number of rows to sample each on .partial_fit()
+ )
+
+ srfc.fit(x, y, sample_weight=np.ones_like(y)) # Optional
+
+ # Should be n_estimators_per_chunk * spf_n_fits
+ print(len(srfc.estimators_))
+ print(srfc.score(x, y))
diff --git a/example_partial_fit.py b/scripts/example_partial_fit.py
similarity index 81%
rename from example_partial_fit.py
rename to scripts/example_partial_fit.py
index 5a21c7c..17e8b82 100644
--- a/example_partial_fit.py
+++ b/scripts/example_partial_fit.py
@@ -4,13 +4,10 @@
from incremental_trees.models.classification.streaming_rfc import StreamingRFC
if __name__ == "__main__":
- srfc = StreamingRFC(n_estimators_per_chunk=20,
- max_n_estimators=np.inf,
- n_jobs=8)
+ srfc = StreamingRFC(n_estimators_per_chunk=20, max_n_estimators=np.inf, n_jobs=8)
# Generate some data in memory
- x, y = make_blobs(n_samples=int(2e5), random_state=0, n_features=40,
- centers=2, cluster_std=100)
+ x, y = make_blobs(n_samples=int(2e5), random_state=0, n_features=40, centers=2, cluster_std=100)
# Feed .partial_fit() with random samples of the data
n_chunks = 30
diff --git a/scripts/requirements.txt b/scripts/requirements.txt
new file mode 100644
index 0000000..fba4851
--- /dev/null
+++ b/scripts/requirements.txt
@@ -0,0 +1,6 @@
+ipykernel
+matplotlib
+jupyter
+jupyterlab
+pandas
+bokeh<3
diff --git a/setup.py b/setup.py
index 60af819..a896d1f 100644
--- a/setup.py
+++ b/setup.py
@@ -8,14 +8,19 @@
setuptools.setup(name='incremental_trees',
version=__version__,
author="Gareth Jones",
- author_email="author@example.com",
+ author_email="garethgithub@gmail.com",
description='Sklearn forests with partial fits',
long_description=long_description,
long_description_content_type="text/markdown",
packages=setuptools.find_packages(),
url="https://github.com/garethjns/IncrementalTrees",
- install_requires=["scikit-learn>=0.22", "pandas",
- "dask>=2",
- "dask-glm==0.2.0",
- "dask-ml>=1",
- "bokeh"])
+ python_requires='>=3.8',
+ install_requires=[
+ "scikit-learn==1.2",
+ "pandas",
+ "numpy",
+ "dask==2022.12",
+ "dask-glm==0.2.0",
+ "dask-ml==2022.5.27",
+ "fsspec"
+ ])
diff --git a/tests/integration/base.py b/tests/integration/base.py
deleted file mode 100644
index 80135b2..0000000
--- a/tests/integration/base.py
+++ /dev/null
@@ -1,240 +0,0 @@
-import numpy as np
-import sklearn
-from dask_ml.datasets import make_blobs, make_regression
-from distributed import LocalCluster, Client
-from sklearn import clone
-from sklearn.model_selection import RandomizedSearchCV
-
-
-class PredictTests:
- def test_predict(self):
- """
- Test prediction function runs are returns expected shape, even if all classes are not in prediction set.
- :return:
- """
-
- # Predict on all data
- preds = self.mod.predict(self.x)
- self.assertEqual(preds.shape, (self.x.shape[0],))
-
- # Predict on single row
- preds = self.mod.predict(self.x[0, :].reshape(1, -1))
- self.assertEqual(preds.shape, (1,))
-
- def test_predict_proba(self):
- """
- Test prediction function runs are returns expected shape, even if all classes are not in prediction set.
- :return:
- """
- if getattr(self.mod, 'predict_proba', False) is False:
- # No predict_proba for this model type
- pass
- else:
- # Predict on all data
- preds = self.mod.predict_proba(self.x)
- self.assertEqual(preds.shape, (self.x.shape[0], 2))
-
- # Predict on single row
- preds = self.mod.predict_proba(self.x[0, :].reshape(1, -1))
- self.assertEqual(preds.shape, (1, 2))
-
- def test_score(self):
- self.mod.score(self.x, self.y)
-
-
-class PartialFitTests(PredictTests):
- """
- Standard tests to run on supplied model and data.
-
- Inherit this into a class with model/data defined in setUpClass into self.mod, self.x, self.y. Then call the
- setupClass method here to set some helper values.
-
- These tests need to run in order, as self.mod used through tests. Maybe would be better to mock it each time,
- but lazy....
-
- These are run without using Dask, so the subset passing to partial_fit is handled manually.
- """
-
- @classmethod
- def setUpClass(cls) -> None:
- """
- Set helper values from specified model/data. Need to super this from child setUpClass.
- """
- cls.chunk_size = 10
- cls.n_chunks = int(cls.n_samples / cls.chunk_size)
- cls.samples_per_chunk = int(cls.n_samples / cls.n_chunks)
-
- # Cursor will be tracked through data between tests.
- cls.s_idx = 0
- cls.e_idx = cls.samples_per_chunk
-
- def test_first_partial_fit_call(self):
- """
- Call partial_fit for the first time on self.mod.
- :return:
- """
- # Call the first partial fit specifying classes
- self.mod.partial_fit(self.x[self.s_idx:self.e_idx, :],
- self.y[self.s_idx:self.e_idx],
- classes=np.unique(self.y))
-
- def test_next_partial_fit_calls(self):
- """
- Call partial fit on remaining chunks.
-
- Provide classes again on second iteration, otherwise don't.
-
- :return:
- """
- for i in range(1, self.n_chunks):
- self.mod.partial_fit(self.x[self.s_idx:self.e_idx, :],
- self.y[self.s_idx:self.e_idx],
- classes=np.unique(self.y) if i == 2 else None)
-
- self.s_idx = self.e_idx
- self.e_idx = self.s_idx + self.samples_per_chunk
-
- # Set expected number of esitmators in class set up
- # Check it matches with parameters
- expect_ = min((self.mod.n_estimators_per_chunk * self.n_chunks), self.mod.max_n_estimators)
- self.assertEqual(expect_, self.expected_n_estimators)
- # Then check the model matches the validated expectation
- self.assertEqual(len(self.mod.estimators_), self.expected_n_estimators)
-
- def test_result(self):
- """Test performance of model is approximately as expected."""
- pass
-
-
-class FitTests(PredictTests):
- """
- Test direct calls to.fit with dask off, which will use ._sampled_partial_fit() to feed partial_fit.
- """
-
- @classmethod
- def setUpClass(cls):
- """
- Set helper actual model from specified values. Need to super this from child setUpClass.
- :return:
- """
- cls.expected_n_estimators = cls.spf_n_fits * cls.n_estimators_per_sample
-
- cls.n_samples = 1000
- cls.x, cls.y = sklearn.datasets.make_blobs(n_samples=cls.n_samples,
- random_state=0,
- n_features=40,
- centers=2,
- cluster_std=100)
-
- cls.grid = RandomizedSearchCV(clone(cls.mod),
- scoring='roc_auc',
- cv=2,
- n_iter=3,
- verbose=10,
- param_distributions={'spf_sample_prop': [0.1, 0.2, 0.3],
- 'spf_n_fits': [10, 20, 30]},
- n_jobs=-1)
-
- def test_fit__sampled_partial_fit(self):
- """With dask off, call .fit directly."""
- self.mod.fit(self.x, self.y)
-
- def test_n_estimators(self):
- self.assertEqual(self.expected_n_estimators, len(self.mod.estimators_))
-
- def test_grid_search(self):
- """With dask off, try with sklearn GS."""
- self.grid.fit(self.x, self.y)
- self.grid.score(self.x, self.y)
-
-
-class DaskTests:
- """
- Standard tests to run on supplied model and data.
-
- Inherit this into a class with model/data defined in setUpClass into self.mod, self.x, self.y. Then call the
- setupClass method here to set some helper values.
-
- These tests need to run in order, as self.mod used through tests. Maybe would be better to mock it each time,
- but lazy....
-
- These are run without using Dask, so the subset passing to partial_fit is handled manually.
- """
-
- @classmethod
- def setUpClass(cls):
- """
- Set helper values from specified model/data. Need to super this from child setUpClass.
- :return:
- """
- """
- Prepare dask connection once.
- """
-
- try:
- cls.cluster = LocalCluster(processes=True,
- n_workers=4,
- threads_per_worker=2,
- scheduler_port=8586,
- diagnostics_port=8587)
- except (OSError, AttributeError):
- cls.cluster = 'localhost:8586'
-
- cls.client = Client(cls.cluster)
-
- # Set helper valuez
- cls.samples_per_chunk = int(cls.n_samples / cls.n_chunks)
-
- def _prep_data(self, reg=False):
- self.n_samples = int(1e5)
- self.chunk_size = int(1e4)
- self.n_chunks = np.ceil(self.n_samples / self.chunk_size).astype(int)
-
- if reg:
- self.x, self.y = make_regression(n_samples=self.n_samples,
- chunks=self.chunk_size,
- random_state=0,
- n_features=40)
- else:
- self.x, self.y = make_blobs(n_samples=self.n_samples,
- chunks=self.chunk_size,
- random_state=0,
- n_features=40,
- centers=2,
- cluster_std=100)
-
- return self
-
- @classmethod
- def tearDownClass(cls) -> None:
- cls.client.close()
- if type(cls.cluster) != str:
- cls.cluster.close()
-
- def test_fit(self):
- """Test the supplied model by wrapping with dask Incremental and calling .fit."""
- self.mod.fit(self.x, self.y,
- classes=np.unique(self.y).compute())
-
- # Set expected number of estimators in class set up
- # Check it matches with parameters
- expect_ = min((self.mod.estimator.n_estimators_per_chunk * self.n_chunks), self.mod.estimator.max_n_estimators)
- self.assertEqual(expect_, self.expected_n_estimators)
- # Then check the model matches the validated expectation
- self.assertEqual(len(self.mod.estimators_), self.expected_n_estimators)
-
- def test_predict(self):
- """
- Test prediction function runs are returns expected shape, even if all classes are not in prediction set.
- :return:
- """
-
- # Predict on all data
- self.mod.predict(self.x)
-
- # Predict on single row
- self.mod.predict(self.x[0, :].reshape(1, -1))
-
- def test_result(self):
- """Test performance of model is approximately as expected."""
- pass
diff --git a/tests/unit/incremental_trees/__init__.py b/tests/integration/base/__init__.py
similarity index 100%
rename from tests/unit/incremental_trees/__init__.py
rename to tests/integration/base/__init__.py
diff --git a/tests/integration/incremental_trees/test_trees_inconsistent_classes.py b/tests/integration/base/class_consistency_test_base.py
similarity index 65%
rename from tests/integration/incremental_trees/test_trees_inconsistent_classes.py
rename to tests/integration/base/class_consistency_test_base.py
index 86dfd35..d725d16 100644
--- a/tests/integration/incremental_trees/test_trees_inconsistent_classes.py
+++ b/tests/integration/base/class_consistency_test_base.py
@@ -1,13 +1,16 @@
import unittest
+from typing import Union
import numpy as np
import pandas as pd
from incremental_trees.models.classification.streaming_extc import StreamingEXTC
-from incremental_trees.trees import StreamingRFC
+from incremental_trees.models.classification.streaming_rfc import StreamingRFC
-class ClassConsistencyTests:
+class ClassConsistencyTestBase(unittest.TestCase):
+ mod: Union[StreamingEXTC, StreamingRFC]
+
@classmethod
def setUpClass(cls):
data = pd.DataFrame({'a': (1, 2, 3, 4, 5),
@@ -23,8 +26,7 @@ def setUpClass(cls):
def test_none_on_second_call(self):
# Fit with 2 classes
- self.mod.partial_fit(self.x[0:6], self.y[0:6],
- classes=np.array([1, 2, 3]))
+ self.mod.partial_fit(self.x[0:6], self.y[0:6], classes=np.array([1, 2, 3]))
self.mod.predict(self.x[0:6])
self.assertEqual(self.mod.n_classes_, 3)
@@ -39,16 +41,14 @@ def test_none_on_second_call(self):
def test_correct_on_second_call(self):
# Fit with 2 classes
- self.mod.partial_fit(self.x[0:6], self.y[0:6],
- classes=np.array([1, 2, 3]))
+ self.mod.partial_fit(self.x[0:6], self.y[0:6], classes=np.array([1, 2, 3]))
self.mod.predict(self.x[0:6])
self.assertEqual(self.mod.n_classes_, 3)
self.assertListEqual(list(self.mod.classes_), [1, 2, 3])
# Fit with 3 classes
- self.mod.partial_fit(self.x, self.y,
- classes=np.array([1, 2, 3]))
+ self.mod.partial_fit(self.x, self.y, classes=np.array([1, 2, 3]))
self.mod.predict(self.x)
self.assertEqual(self.mod.n_classes_, 3)
@@ -58,16 +58,14 @@ def test_incorrect_on_second_call(self):
"""Incorrect on second call - can happen when dask passes classes."""
# Fit with 3 classes
- self.mod.partial_fit(self.x, self.y,
- classes=np.array([1, 2, 3]))
+ self.mod.partial_fit(self.x, self.y, classes=np.array([1, 2, 3]))
self.mod.predict(self.x)
self.assertEqual(self.mod.n_classes_, 3)
self.assertListEqual(list(self.mod.classes_), [1, 2, 3])
# Fit with 2 classes
- self.mod.partial_fit(self.x[0:6], self.y[0:6],
- classes=np.array([1, 2]))
+ self.mod.partial_fit(self.x[0:6], self.y[0:6], classes=np.array([1, 2]))
self.mod.predict(self.x[0:6])
self.assertEqual(self.mod.n_classes_, 3)
@@ -81,15 +79,3 @@ def test_incorrect_on_second_call(self):
self.assertListEqual(list(self.mod.classes_), [1, 2, 3])
-class TestInconsistentClassesRFC(ClassConsistencyTests, unittest.TestCase):
- def setUp(self):
- self.mod = StreamingRFC(n_estimators_per_chunk=1,
- max_n_estimators=np.inf,
- verbose=2)
-
-
-class TestInconsistentClassesEXT(ClassConsistencyTests, unittest.TestCase):
- def setUp(self):
- self.mod = StreamingEXTC(n_estimators_per_chunk=1,
- max_n_estimators=np.inf,
- verbose=2)
diff --git a/tests/integration/base/dask_test_base.py b/tests/integration/base/dask_test_base.py
new file mode 100644
index 0000000..b30e4ed
--- /dev/null
+++ b/tests/integration/base/dask_test_base.py
@@ -0,0 +1,102 @@
+import unittest
+from typing import Union
+
+import numpy as np
+from dask_ml.datasets import make_blobs, make_regression
+from dask_ml.wrappers import Incremental
+from distributed import LocalCluster, Client
+
+
+class DaskTestBase(unittest.TestCase):
+ client: Client
+ cluster: Union[str, LocalCluster]
+ n_samples: int
+ n_chunks: int
+ n_samples: int
+ mod: Incremental
+ reg: bool
+ expected_n_estimators: int
+ x: np.ndarray
+ y: np.ndarray
+
+ """
+ Standard tests to run on supplied model and data.
+
+ Inherit this into a class with model/data defined in setUpClass into self.mod, self.x, self.y. Then call the
+ setupClass method here to set some helper values.
+
+ These tests need to run in order, as self.mod used through tests. Maybe would be better to mock it each time,
+ but lazy....
+
+ These are run without using Dask, so the subset passing to partial_fit is handled manually.
+ """
+
+ @classmethod
+ def setUpClass(cls):
+ """
+ Set helper values from specified model/data. Need to super this from child setUpClass.
+ :return:
+ """
+ """
+ Prepare dask connection once.
+ """
+
+ try:
+ cls.cluster = LocalCluster(
+ processes=True,
+ n_workers=4,
+ threads_per_worker=2,
+ scheduler_port=8586,
+ diagnostics_port=8587
+ )
+ except (OSError, AttributeError):
+ cls.cluster = 'localhost:8586'
+
+ cls.client = Client(cls.cluster)
+ cls.samples_per_chunk = int(cls.n_samples / cls.n_chunks)
+
+ def _prep_data(self, reg: bool):
+ self.n_samples = int(1e5)
+ self.chunk_size = int(1e4)
+ self.n_chunks = np.ceil(self.n_samples / self.chunk_size).astype(int)
+
+ if reg:
+ self.x, self.y = make_regression(
+ n_samples=self.n_samples,
+ chunks=self.chunk_size,
+ random_state=0,
+ n_features=40)
+ else:
+ self.x, self.y = make_blobs(
+ n_samples=self.n_samples,
+ chunks=self.chunk_size,
+ random_state=0,
+ n_features=40,
+ centers=2,
+ cluster_std=100)
+
+ return self
+
+ @classmethod
+ def tearDownClass(cls) -> None:
+ cls.client.close()
+ if type(cls.cluster) != str:
+ cls.cluster.close()
+
+ def test_fit_predict(self):
+ """Test the supplied model by wrapping with dask Incremental and calling .fit."""
+
+ # Act
+ self.mod.fit(self.x, self.y, classes=np.unique(self.y).compute())
+ preds = self.mod.predict(self.x)
+ single_pred = self.mod.predict(self.x[0, :].reshape(1, -1))
+
+ # Assert
+ # Set expected number of estimators in class set up
+ # Check it matches with parameters
+ expect_ = min((self.mod.estimator.n_estimators_per_chunk * self.n_chunks), self.mod.estimator.max_n_estimators)
+ self.assertEqual(expect_, self.expected_n_estimators)
+ # Then check the model matches the validated expectation
+ self.assertEqual(len(self.mod.estimators_), self.expected_n_estimators)
+ self.assertEqual(self.x.shape[0], preds.shape[0])
+ self.assertEqual(1, len(single_pred))
diff --git a/tests/integration/base/fit_test_base.py b/tests/integration/base/fit_test_base.py
new file mode 100644
index 0000000..8fe42f8
--- /dev/null
+++ b/tests/integration/base/fit_test_base.py
@@ -0,0 +1,60 @@
+from typing import Union
+
+import sklearn
+from sklearn import clone
+from sklearn.model_selection import RandomizedSearchCV
+
+from incremental_trees.models.classification.streaming_extc import StreamingEXTC
+from incremental_trees.models.classification.streaming_rfc import StreamingRFC
+from incremental_trees.models.regression.streaming_extr import StreamingEXTR
+from incremental_trees.models.regression.streaming_rfr import StreamingRFR
+from tests.integration.base.predict_test_base import PredictTestBase
+
+
+class FitTestBase(PredictTestBase):
+ """
+ Test direct calls to.fit with dask off, which will use ._sampled_partial_fit() to feed partial_fit.
+ """
+
+ spf_n_fits: int
+ n_samples: int
+ n_estimators_per_sample: int
+ mod: Union[StreamingEXTC, StreamingEXTR, StreamingRFC, StreamingRFR]
+ dask_feeding: bool = False
+ spf_sample_prop: float = 0.1
+
+ @classmethod
+ def setUpClass(cls):
+ """
+ Set helper actual model from specified values. Need to super this from child setUpClass.
+ :return:
+ """
+ cls.expected_n_estimators = cls.spf_n_fits * cls.n_estimators_per_sample
+
+ cls.n_samples = 1000
+ cls.x, cls.y = sklearn.datasets.make_blobs(n_samples=cls.n_samples,
+ random_state=0,
+ n_features=40,
+ centers=2,
+ cluster_std=100)
+
+ cls.grid = RandomizedSearchCV(clone(cls.mod),
+ scoring='roc_auc',
+ cv=2,
+ n_iter=3,
+ verbose=10,
+ param_distributions={'spf_sample_prop': [0.1, 0.2, 0.3],
+ 'spf_n_fits': [10, 20, 30]},
+ n_jobs=-1)
+
+ def test_fit__sampled_partial_fit(self):
+ """With dask off, call .fit directly."""
+ self.mod.fit(self.x, self.y)
+
+ def test_n_estimators(self):
+ self.assertEqual(self.expected_n_estimators, len(self.mod.estimators_))
+
+ def test_grid_search(self):
+ """With dask off, try with sklearn GS."""
+ self.grid.fit(self.x, self.y)
+ self.grid.score(self.x, self.y)
diff --git a/tests/integration/base/partial_fit_test_base.py b/tests/integration/base/partial_fit_test_base.py
new file mode 100644
index 0000000..5267e7b
--- /dev/null
+++ b/tests/integration/base/partial_fit_test_base.py
@@ -0,0 +1,78 @@
+from typing import Union
+
+import numpy as np
+
+from incremental_trees.models.classification.streaming_extc import StreamingEXTC
+from incremental_trees.models.classification.streaming_rfc import StreamingRFC
+from incremental_trees.models.regression.streaming_extr import StreamingEXTR
+from incremental_trees.models.regression.streaming_rfr import StreamingRFR
+from tests.integration.base.predict_test_base import PredictTestBase
+
+
+class PartialFitTestBase(PredictTestBase):
+ mod: Union[StreamingEXTC, StreamingEXTR, StreamingRFC, StreamingRFR]
+ x: np.ndarray
+ y: np.ndarray
+ n_samples: int
+ chunk_size: int
+ n_chunks: int
+ samples_per_chunk: int
+ expected_n_estimators: int
+
+ """
+ Standard tests to run on supplied model and data.
+
+ Inherit this into a class with model/data defined in setUpClass into self.mod, self.x, self.y. Then call the
+ setupClass method here to set some helper values.
+
+ These tests need to run in order, as self.mod used through tests. Maybe would be better to mock it each time,
+ but lazy....
+
+ These are run without using Dask, so the subset passing to partial_fit is handled manually.
+ """
+
+ @classmethod
+ def setUpClass(cls) -> None:
+ """
+ Set helper values from specified model/data. Need to super this from child setUpClass.
+ """
+ cls.chunk_size = 10
+ cls.n_chunks = int(cls.n_samples / cls.chunk_size)
+ cls.samples_per_chunk = int(cls.n_samples / cls.n_chunks)
+
+ # Cursor will be tracked through data between tests.
+ cls.s_idx = 0
+ cls.e_idx = cls.samples_per_chunk
+
+ def test_first_partial_fit_call(self):
+ """
+ Call partial_fit for the first time on self.mod.
+ :return:
+ """
+ # Call the first partial fit specifying classes
+ self.mod.partial_fit(self.x[self.s_idx:self.e_idx, :],
+ self.y[self.s_idx:self.e_idx],
+ classes=np.unique(self.y))
+
+ def test_next_partial_fit_calls(self):
+ """
+ Call partial fit on remaining chunks.
+
+ Provide classes again on second iteration, otherwise don't.
+
+ :return:
+ """
+ for i in range(1, self.n_chunks):
+ self.mod.partial_fit(self.x[self.s_idx:self.e_idx, :],
+ self.y[self.s_idx:self.e_idx],
+ classes=np.unique(self.y) if i == 2 else None)
+
+ self.s_idx = self.e_idx
+ self.e_idx = self.s_idx + self.samples_per_chunk
+
+ # Set expected number of estimators in class set up
+ # Check it matches with parameters
+ expect_ = min((self.mod.n_estimators_per_chunk * self.n_chunks), self.mod.max_n_estimators)
+ self.assertEqual(expect_, self.expected_n_estimators)
+ # Then check the model matches the validated expectation
+ self.assertEqual(len(self.mod.estimators_), self.expected_n_estimators)
diff --git a/tests/integration/base/predict_test_base.py b/tests/integration/base/predict_test_base.py
new file mode 100644
index 0000000..fba88f1
--- /dev/null
+++ b/tests/integration/base/predict_test_base.py
@@ -0,0 +1,50 @@
+import unittest
+from typing import Union
+
+import numpy as np
+
+from incremental_trees.models.classification.streaming_extc import StreamingEXTC
+from incremental_trees.models.classification.streaming_rfc import StreamingRFC
+from incremental_trees.models.regression.streaming_extr import StreamingEXTR
+from incremental_trees.models.regression.streaming_rfr import StreamingRFR
+
+
+class PredictTestBase(unittest.TestCase):
+ x: np.ndarray
+ y: np.ndarray
+ mod: Union[StreamingEXTC, StreamingEXTR, StreamingRFC, StreamingRFR]
+
+ def test_predict(self):
+ """
+ Test prediction function runs are returns expected shape, even if all classes are not in prediction set.
+ :return:
+ """
+
+ # Predict on all data
+ preds = self.mod.predict(self.x)
+ self.assertEqual(preds.shape, (self.x.shape[0],))
+
+ # Predict on single row
+ preds = self.mod.predict(self.x[0, :].reshape(1, -1))
+ self.assertEqual(preds.shape, (1,))
+
+ def test_predict_proba(self):
+ """
+ Test prediction function runs are returns expected shape, even if all classes are not in prediction set.
+ :return:
+ """
+ if getattr(self.mod, 'predict_proba', False) is False:
+ # No predict_proba for this model type
+ pass
+ else:
+ # Predict on all data
+ preds = self.mod.predict_proba(self.x)
+ self.assertEqual(preds.shape, (self.x.shape[0], 2))
+
+ # Predict on single row
+ preds = self.mod.predict_proba(self.x[0, :].reshape(1, -1))
+ self.assertEqual(preds.shape, (1, 2))
+
+ def test_score(self):
+ score = self.mod.score(self.x, self.y)
+ self.assertIsInstance(score, float)
diff --git a/tests/integration/classification/__init__.py b/tests/integration/classification/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/integration/classification/test_streaming_extc.py b/tests/integration/classification/test_streaming_extc.py
new file mode 100644
index 0000000..f86e516
--- /dev/null
+++ b/tests/integration/classification/test_streaming_extc.py
@@ -0,0 +1,211 @@
+import numpy as np
+from dask_ml.wrappers import Incremental
+from sklearn.datasets import make_blobs
+
+from incremental_trees.models.classification.streaming_extc import StreamingEXTC
+from tests.integration.base.class_consistency_test_base import ClassConsistencyTestBase
+from tests.integration.base.dask_test_base import DaskTestBase
+from tests.integration.base.fit_test_base import FitTestBase
+from tests.integration.base.partial_fit_test_base import PartialFitTestBase
+
+
+class TestStreamingEXTCWithPartialFitsUnlimitedEstimators(PartialFitTestBase):
+ """
+ Test SEXT with single estimator per chunk with "random forest style" max features. ie, subset.
+
+ No limit on the total number of trees.
+ """
+
+ @classmethod
+ def setUpClass(cls):
+ """Set up model to test."""
+ cls.n_samples = 1000
+ cls.x, cls.y = make_blobs(n_samples=int(2e4), random_state=0, n_features=40, centers=2, cluster_std=100)
+ cls.mod = StreamingEXTC(n_estimators_per_chunk=1, max_n_estimators=np.inf)
+ cls.expected_n_estimators = 100
+
+ super().setUpClass()
+
+
+class TestStreamingEXTCWithPartialFitsLimitedEstimators(PartialFitTestBase):
+ """
+ Test SEXT with single estimator per chunk with "random forest style" max features. ie, subset.
+
+ Total models limited to 39.
+ """
+
+ @classmethod
+ def setUpClass(cls):
+ """Set up model to test."""
+ cls.n_samples = 1000
+ cls.x, cls.y = make_blobs(n_samples=int(2e4), random_state=0, n_features=40, centers=2, cluster_std=100)
+ cls.mod = StreamingEXTC(n_estimators_per_chunk=1, max_n_estimators=39)
+ cls.expected_n_estimators = 39
+
+ super().setUpClass()
+
+
+class TestStreamingEXTCWithPartialFitsMultipleEstimatorsPerChunk(PartialFitTestBase):
+ """
+ Test SEXT with multiple estimators per chunk with "random forest style" max features. ie, subset.
+
+ No limit on total models, 3 estimators per row subset (each with different feature subset)
+ """
+
+ @classmethod
+ def setUpClass(cls):
+ """Set up model to test."""
+ cls.n_samples = 1000
+ cls.x, cls.y = make_blobs(n_samples=int(2e4), random_state=0, n_features=40, centers=2, cluster_std=100)
+ cls.mod = StreamingEXTC(n_estimators_per_chunk=3, n_jobs=-1, max_n_estimators=np.inf)
+ cls.expected_n_estimators = 300
+
+ super().setUpClass()
+
+
+class TestStreamingEXTCWithPartialFitsAllFeatures(PartialFitTestBase):
+ """
+ Test SEXT with single estimator per chunk with "decision tree style" max features. ie, all available to each tree.
+
+ No limit on total models, 1 estimators per row subset.
+ """
+
+ @classmethod
+ def setUpClass(cls):
+ """Set up model to test."""
+ cls.n_samples = 1000
+ cls.x, cls.y = make_blobs(n_samples=int(2e4), random_state=0, n_features=40, centers=2, cluster_std=100)
+ cls.mod = StreamingEXTC(n_estimators_per_chunk=1, max_features=cls.x.shape[1], max_n_estimators=np.inf)
+ cls.expected_n_estimators = 100
+
+ super().setUpClass()
+
+
+class TestStreamingEXTCWithPartialFitsMultipleEstimatorsPerChunkAllFeatures(PartialFitTestBase):
+ """
+ Test SEXT with single estimator per chunk with "decision tree style" max features. ie, all available to each tree.
+
+ No limit on total models, 3 estimators per row subset.
+ """
+
+ @classmethod
+ def setUpClass(cls):
+ cls.n_samples = 1000
+ cls.x, cls.y = make_blobs(n_samples=int(2e4), random_state=0, n_features=40, centers=2, cluster_std=100)
+
+ cls.mod = StreamingEXTC(
+ n_estimators_per_chunk=3,
+ n_jobs=-1,
+ max_features=cls.x.shape[1],
+ max_n_estimators=np.inf
+ )
+ cls.expected_n_estimators = 300
+
+ super().setUpClass()
+
+
+class TestStreamingEXTCWithFitSingleEstimatorPerChunk(FitTestBase):
+ @classmethod
+ def setUpClass(cls):
+ cls.spf_n_fits = 10
+ cls.n_estimators_per_sample = 1
+ cls.mod = StreamingEXTC(
+ verbose=1,
+ n_estimators_per_chunk=cls.n_estimators_per_sample,
+ max_n_estimators=np.inf,
+ dask_feeding=cls.dask_feeding,
+ spf_sample_prop=cls.spf_sample_prop,
+ spf_n_fits=cls.spf_n_fits
+ )
+
+ super().setUpClass()
+
+
+class TestStreamingEXTCWithFitMultipleEstimatorsPerChunk(FitTestBase):
+ @classmethod
+ def setUpClass(cls):
+ """Set up model to test."""
+ cls.spf_n_fits = 10
+ cls.n_estimators_per_sample = 10
+ cls.mod = StreamingEXTC(
+ verbose=1,
+ n_estimators_per_chunk=cls.n_estimators_per_sample,
+ max_n_estimators=np.inf,
+ dask_feeding=cls.dask_feeding,
+ spf_sample_prop=cls.spf_sample_prop,
+ spf_n_fits=cls.spf_n_fits
+ )
+
+ super().setUpClass()
+
+
+class TestStreamingEXTCWithFitAdditionalSteps(FitTestBase):
+ @classmethod
+ def setUpClass(cls):
+ cls.spf_n_fits = 20
+ cls.n_estimators_per_sample = 6
+ cls.mod = StreamingEXTC(
+ verbose=1,
+ n_estimators_per_chunk=cls.n_estimators_per_sample,
+ max_n_estimators=np.inf,
+ dask_feeding=cls.dask_feeding,
+ spf_sample_prop=cls.spf_sample_prop,
+ spf_n_fits=cls.spf_n_fits
+ )
+
+ super().setUpClass()
+
+
+class TestDaskEXTCWithDask(DaskTestBase):
+ @classmethod
+ def setUpClass(cls):
+ cls._prep_data(cls, reg=False)
+ cls.mod = Incremental(StreamingEXTC(n_estimators_per_chunk=1, max_n_estimators=39, verbose=1))
+ cls.expected_n_estimators = 10
+
+ super().setUpClass()
+
+
+class TestDaskEXTCWithDaskMultipleEstimatorsPerChunk(DaskTestBase):
+ @classmethod
+ def setUpClass(cls):
+ cls._prep_data(cls, reg=False)
+ cls.mod = Incremental(StreamingEXTC(n_estimators_per_chunk=2, n_jobs=-1, max_n_estimators=np.inf, verbose=1))
+ cls.expected_n_estimators = 20
+
+ super().setUpClass()
+
+
+class TestDaskEXTCWithDaskManyEstimatorsPerChunk(DaskTestBase):
+ @classmethod
+ def setUpClass(cls):
+ cls._prep_data(cls, reg=False)
+ cls.mod = Incremental(StreamingEXTC(n_estimators_per_chunk=20, n_jobs=-1, max_n_estimators=np.inf, verbose=1))
+ cls.expected_n_estimators = 200
+
+ super().setUpClass()
+
+
+class TestDaskEXTCWithDaskAlLFeatures(DaskTestBase):
+ @classmethod
+ def setUpClass(cls):
+ cls._prep_data(cls, reg=False)
+ cls.mod = Incremental(
+ StreamingEXTC(
+ n_estimators_per_chunk=1, n_jobs=-1,
+ max_n_estimators=np.inf,
+ max_features=cls.x.shape[1],
+ verbose=1
+ )
+ )
+ cls.expected_n_estimators = 10
+
+ super().setUpClass()
+
+
+class TestInconsistentClassesEXTC(ClassConsistencyTestBase):
+ def setUp(self):
+ self.mod = StreamingEXTC(n_estimators_per_chunk=1, max_n_estimators=np.inf, verbose=2)
+
+
+del FitTestBase, PartialFitTestBase, DaskTestBase, ClassConsistencyTestBase
diff --git a/tests/integration/classification/test_streaming_rfc.py b/tests/integration/classification/test_streaming_rfc.py
new file mode 100644
index 0000000..0421de1
--- /dev/null
+++ b/tests/integration/classification/test_streaming_rfc.py
@@ -0,0 +1,210 @@
+import numpy as np
+from dask_ml.wrappers import Incremental
+from sklearn.datasets import make_blobs
+
+from incremental_trees.models.classification.streaming_rfc import StreamingRFC
+from tests.integration.base.class_consistency_test_base import ClassConsistencyTestBase
+from tests.integration.base.dask_test_base import DaskTestBase
+from tests.integration.base.fit_test_base import FitTestBase
+from tests.integration.base.partial_fit_test_base import PartialFitTestBase
+
+
+class TestStreamingRFCWithPartialFitsUnlimitedEstimators(PartialFitTestBase):
+ """
+ Test SRFC with single estimator per chunk with "random forest style" max features. ie, subset.
+
+ No limit on the total number of trees.
+ """
+
+ @classmethod
+ def setUpClass(cls):
+ """Set up model to test."""
+ cls.n_samples = 1000
+ cls.x, cls.y = make_blobs(n_samples=int(2e4), random_state=0, n_features=40, centers=2, cluster_std=100)
+ cls.mod = StreamingRFC(verbose=1, n_estimators_per_chunk=1, max_n_estimators=np.inf)
+ cls.expected_n_estimators = 100
+
+ super().setUpClass()
+
+
+class TestStreamingRFCWithPartialFitsLimitedEstimators(PartialFitTestBase):
+ """
+ Test SRFC with single estimator per chunk with "random forest style" max features. ie, subset.
+
+ Total models limited to 39.
+ """
+
+ @classmethod
+ def setUpClass(cls):
+ """Set up model to test."""
+ cls.n_samples = 1000
+ cls.x, cls.y = make_blobs(n_samples=int(2e4), random_state=0, n_features=40, centers=2, cluster_std=100)
+ cls.mod = StreamingRFC(n_estimators_per_chunk=1, max_n_estimators=39)
+ cls.expected_n_estimators = 39
+
+ super().setUpClass()
+
+
+class TestStreamingRFCWithPartialFitsMultipleEstimatorsPerChunk(PartialFitTestBase):
+ """
+ Test SRFC with multiple estimators per chunk with "random forest style" max features. ie, subset.
+
+ No limit on total models, 3 estimators per row subset (each with different feature subset)
+ """
+
+ @classmethod
+ def setUpClass(cls):
+ """Set up model to test."""
+ cls.n_samples = 1000
+ cls.x, cls.y = make_blobs(n_samples=int(2e4), random_state=0, n_features=40, centers=2, cluster_std=100)
+ cls.mod = StreamingRFC(n_estimators_per_chunk=3, n_jobs=-1, max_n_estimators=np.inf)
+ cls.expected_n_estimators = 300
+
+ super().setUpClass()
+
+
+class TestStreamingRFCWithPartialFitsAllFeatures(PartialFitTestBase):
+ """
+ Test SRFC with single estimator per chunk with "decision tree style" max features. ie, all available to each tree.
+
+ No limit on total models, 1 estimators per row subset.
+ """
+
+ @classmethod
+ def setUpClass(cls):
+ """Set up model to test."""
+ cls.n_samples = 1000
+ cls.x, cls.y = make_blobs(n_samples=int(2e4), random_state=0, n_features=40, centers=2, cluster_std=100)
+ cls.mod = StreamingRFC(n_estimators_per_chunk=1, max_features=cls.x.shape[1], max_n_estimators=np.inf)
+ cls.expected_n_estimators = 100
+
+ super().setUpClass()
+
+
+class TestStreamingRFCWithPartialFitsMultipleEstimatorsPerChunkAllFeatures(PartialFitTestBase):
+ """
+ Test SRFC with single estimator per chunk with "decision tree style" max features. ie, all available to each tree.
+
+ No limit on total models, 3 estimators per row subset.
+ """
+
+ @classmethod
+ def setUpClass(cls):
+ """Set up model to test."""
+ cls.n_samples = 1000
+ cls.x, cls.y = make_blobs(n_samples=int(2e4), random_state=0, n_features=40, centers=2, cluster_std=100)
+ cls.mod = StreamingRFC(
+ n_estimators_per_chunk=3,
+ n_jobs=-1,
+ max_features=cls.x.shape[1],
+ max_n_estimators=np.inf
+ )
+ cls.expected_n_estimators = 300
+
+ super().setUpClass()
+
+
+class TestStreamingEXTCWithFitSingleEstimatorPerChunk(FitTestBase):
+ @classmethod
+ def setUpClass(cls):
+ cls.spf_n_fits = 10
+ cls.n_estimators_per_sample = 1
+ cls.mod = StreamingRFC(
+ verbose=1,
+ n_estimators_per_chunk=cls.n_estimators_per_sample,
+ max_n_estimators=np.inf,
+ dask_feeding=cls.dask_feeding,
+ spf_sample_prop=cls.spf_sample_prop,
+ spf_n_fits=cls.spf_n_fits
+ )
+
+ super().setUpClass()
+
+
+class TestStreamingEXTCWithFitMultipleEstimatorsPerChunk(FitTestBase):
+ @classmethod
+ def setUpClass(cls):
+ cls.spf_n_fits = 10
+ cls.n_estimators_per_sample = 10
+ cls.mod = StreamingRFC(
+ verbose=1,
+ n_estimators_per_chunk=cls.n_estimators_per_sample,
+ max_n_estimators=np.inf,
+ dask_feeding=cls.dask_feeding,
+ spf_sample_prop=cls.spf_sample_prop,
+ spf_n_fits=cls.spf_n_fits
+ )
+
+ super().setUpClass()
+
+
+class TestStreamingEXTCWithFitAdditionalSteps(FitTestBase):
+ @classmethod
+ def setUpClass(cls):
+ cls.spf_n_fits = 20
+ cls.n_estimators_per_sample = 6
+ cls.mod = StreamingRFC(
+ verbose=1,
+ n_estimators_per_chunk=cls.n_estimators_per_sample,
+ max_n_estimators=np.inf,
+ dask_feeding=cls.dask_feeding,
+ spf_sample_prop=cls.spf_sample_prop,
+ spf_n_fits=cls.spf_n_fits
+ )
+
+ super().setUpClass()
+
+
+class TestDaskRFCWithDask(DaskTestBase):
+ @classmethod
+ def setUpClass(cls):
+ cls._prep_data(cls, reg=False)
+ cls.mod = Incremental(StreamingRFC(n_estimators_per_chunk=1, max_n_estimators=39, verbose=1))
+ cls.expected_n_estimators = 10
+
+ super().setUpClass()
+
+
+class TestDaskRFCWithDaskMultipleEstimatorsPerChunk(DaskTestBase):
+ @classmethod
+ def setUpClass(cls):
+ cls._prep_data(cls, reg=False)
+ cls.mod = Incremental(StreamingRFC(n_estimators_per_chunk=2, n_jobs=-1, max_n_estimators=np.inf, verbose=1))
+ cls.expected_n_estimators = 20
+
+ super().setUpClass()
+
+
+class TestDaskRFCWithDaskManyEstimatorsPerChunk(DaskTestBase):
+ @classmethod
+ def setUpClass(cls):
+ cls._prep_data(cls, reg=False)
+ cls.mod = Incremental(StreamingRFC(n_estimators_per_chunk=20, n_jobs=-1, max_n_estimators=np.inf, verbose=1))
+ cls.expected_n_estimators = 200
+
+ super().setUpClass()
+
+
+class TestDaskRFCWithDaskAlLFeatures(DaskTestBase):
+ @classmethod
+ def setUpClass(cls):
+ cls._prep_data(cls, reg=False)
+ cls.mod = Incremental(
+ StreamingRFC(
+ n_estimators_per_chunk=1, n_jobs=-1,
+ max_n_estimators=np.inf,
+ max_features=cls.x.shape[1],
+ verbose=1
+ )
+ )
+ cls.expected_n_estimators = 10
+
+ super().setUpClass()
+
+
+class TestInconsistentClassesRFC(ClassConsistencyTestBase):
+ def setUp(self):
+ self.mod = StreamingRFC(n_estimators_per_chunk=1, max_n_estimators=np.inf, verbose=2)
+
+
+del FitTestBase, PartialFitTestBase, DaskTestBase, ClassConsistencyTestBase
diff --git a/tests/integration/incremental_trees/test_trees.py b/tests/integration/incremental_trees/test_trees.py
deleted file mode 100644
index ad7c2d9..0000000
--- a/tests/integration/incremental_trees/test_trees.py
+++ /dev/null
@@ -1,897 +0,0 @@
-import unittest
-
-import numpy as np
-import sklearn
-import sklearn.datasets
-
-from incremental_trees.models.classification.streaming_extc import StreamingEXTC
-from incremental_trees.models.regression.streaming_extr import StreamingEXTR
-from incremental_trees.models.regression.streaming_rfr import StreamingRFR
-from incremental_trees.trees import StreamingRFC
-from tests.integration.base import PartialFitTests, FitTests
-
-
-class TestStreamingRFC1(PartialFitTests, unittest.TestCase):
- """
- Test SRFC with single estimator per chunk with "random forest style" max features. ie, subset.
-
- No limit on the total number of trees.
- """
-
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
- cls.n_samples = 1000
- cls.x, cls.y = sklearn.datasets.make_blobs(n_samples=int(2e4),
- random_state=0,
- n_features=40,
- centers=2,
- cluster_std=100)
-
- cls.mod = StreamingRFC(verbose=1,
- n_estimators_per_chunk=1,
- max_n_estimators=np.inf)
-
- # Set expected number of estimators
- cls.expected_n_estimators = 100
-
- # Set helper values
- super().setUpClass()
-
-
-class TestStreamingRFC2(PartialFitTests, unittest.TestCase):
- """
- Test SRFC with single estimator per chunk with "random forest style" max features. ie, subset.
-
- Total models limited to 39.
- """
-
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
- cls.n_samples = 1000
- cls.x, cls.y = sklearn.datasets.make_blobs(n_samples=int(2e4),
- random_state=0,
- n_features=40,
- centers=2,
- cluster_std=100)
-
- cls.mod = StreamingRFC(n_estimators_per_chunk=1,
- max_n_estimators=39)
-
- # Set expected number of estimators
- cls.expected_n_estimators = 39
-
- # Set helper values
- super().setUpClass()
-
-
-class TestStreamingRFC3(PartialFitTests, unittest.TestCase):
- """
- Test SRFC with multiple estimators per chunk with "random forest style" max features. ie, subset.
-
- No limit on total models, 3 estimators per row subset (each with different feature subset)
- """
-
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
- cls.n_samples = 1000
- cls.x, cls.y = sklearn.datasets.make_blobs(n_samples=int(2e4),
- random_state=0,
- n_features=40,
- centers=2,
- cluster_std=100)
-
- cls.mod = StreamingRFC(n_estimators_per_chunk=3,
- n_jobs=-1,
- max_n_estimators=np.inf)
-
- # Set expected number of estimators
- cls.expected_n_estimators = 300
-
- # Set helper values
- super().setUpClass()
-
-
-class TestStreamingRFC4(PartialFitTests, unittest.TestCase):
- """
- Test SRFC with single estimator per chunk with "decision tree style" max features. ie, all available to each tree.
-
- No limit on total models, 1 estimators per row subset.
- """
-
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
- cls.n_samples = 1000
- cls.x, cls.y = sklearn.datasets.make_blobs(n_samples=int(2e4),
- random_state=0,
- n_features=40,
- centers=2,
- cluster_std=100)
-
- cls.mod = StreamingRFC(n_estimators_per_chunk=1,
- max_features=cls.x.shape[1],
- max_n_estimators=np.inf)
-
- # Set expected number of estimators
- cls.expected_n_estimators = 100
-
- # Set helper values
- super().setUpClass()
-
-
-class TestStreamingRFC5(PartialFitTests, unittest.TestCase):
- """
- Test SRFC with single estimator per chunk with "decision tree style" max features. ie, all available to each tree.
-
- No limit on total models, 3 estimators per row subset.
- """
-
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
- cls.n_samples = 1000
- cls.x, cls.y = sklearn.datasets.make_blobs(n_samples=int(2e4),
- random_state=0,
- n_features=40,
- centers=2,
- cluster_std=100)
-
- cls.mod = StreamingRFC(n_estimators_per_chunk=3,
- n_jobs=-1,
- max_features=cls.x.shape[1],
- max_n_estimators=np.inf)
-
- # Set expected number of estimators
- cls.expected_n_estimators = 300
-
- # Set helper values
- super().setUpClass()
-
-
-class TestStreamingRFC6(PartialFitTests, unittest.TestCase):
- """
- Test SRFC with single estimator per chunk with "decision tree style" max features. ie, all available to each tree.
-
- No limit on total models, 3 estimators per row subset.
- """
-
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
- cls.n_samples = 1000
- cls.x, cls.y = sklearn.datasets.make_blobs(n_samples=int(2e4),
- random_state=0,
- n_features=40,
- centers=2,
- cluster_std=100)
-
- cls.mod = StreamingRFC(n_estimators_per_chunk=3,
- n_jobs=-1,
- max_features=cls.x.shape[1],
- max_n_estimators=np.inf)
-
- # Set expected number of estimators
- cls.expected_n_estimators = 300
-
- # Set helper values
- super().setUpClass()
-
-
-class TestStreamingRFC7(FitTests, unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
-
- cls.spf_n_fits = 10
- cls.spf_sample_prop = 0.1
- cls.dask_feeding = False
- cls.n_estimators_per_sample = 1
-
- cls.mod = StreamingRFC(verbose=1,
- n_estimators_per_chunk=cls.n_estimators_per_sample,
- max_n_estimators=np.inf,
- dask_feeding=cls.dask_feeding,
- spf_sample_prop=cls.spf_sample_prop,
- spf_n_fits=cls.spf_n_fits)
-
- super().setUpClass()
-
-
-class TestStreamingRFC8(FitTests, unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
- cls.spf_n_fits = 10
- cls.spf_sample_prop = 0.1
- cls.dask_feeding = False
- cls.n_estimators_per_sample = 10
-
- cls.mod = StreamingRFC(verbose=1,
- n_estimators_per_chunk=cls.n_estimators_per_sample,
- max_n_estimators=np.inf,
- dask_feeding=cls.dask_feeding,
- spf_sample_prop=cls.spf_sample_prop,
- spf_n_fits=cls.spf_n_fits)
-
- super().setUpClass()
-
-
-class TestStreamingRFC9(FitTests, unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
-
- cls.spf_n_fits = 20
- cls.spf_sample_prop = 0.1
- cls.dask_feeding = False
- cls.n_estimators_per_sample = 6
-
- cls.mod = StreamingRFC(verbose=1,
- n_estimators_per_chunk=cls.n_estimators_per_sample,
- max_n_estimators=np.inf,
- dask_feeding=cls.dask_feeding,
- spf_sample_prop=cls.spf_sample_prop,
- spf_n_fits=cls.spf_n_fits)
-
- super().setUpClass()
-
-
-class TestStreamingRFR1(PartialFitTests, unittest.TestCase):
- """
- Test SRFC with single estimator per chunk with "random forest style" max features. ie, subset.
-
- No limit on the total number of trees.
- """
-
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
- cls.n_samples = 1000
- cls.x, cls.y = sklearn.datasets.make_regression(n_samples=int(2e4),
- random_state=0,
- n_features=40)
-
- cls.mod = StreamingRFR(verbose=1,
- n_estimators_per_chunk=1,
- max_n_estimators=np.inf)
-
- # Set expected number of estimators
- cls.expected_n_estimators = 100
-
- # Set helper values
- super().setUpClass()
-
-
-class TestStreamingRFR2(PartialFitTests, unittest.TestCase):
- """
- Test SRFC with single estimator per chunk with "random forest style" max features. ie, subset.
-
- Total models limited to 39.
- """
-
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
- cls.n_samples = 1000
- cls.x, cls.y = sklearn.datasets.make_regression(n_samples=int(2e4),
- random_state=0,
- n_features=40)
-
- cls.mod = StreamingRFR(n_estimators_per_chunk=1,
- max_n_estimators=39)
-
- # Set expected number of estimators
- cls.expected_n_estimators = 39
-
- # Set helper values
- super().setUpClass()
-
-
-class TestStreamingRFR3(PartialFitTests, unittest.TestCase):
- """
- Test SRFC with multiple estimators per chunk with "random forest style" max features. ie, subset.
-
- No limit on total models, 3 estimators per row subset (each with different feature subset)
- """
-
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
- cls.n_samples = 1000
- cls.x, cls.y = sklearn.datasets.make_regression(n_samples=int(2e4),
- random_state=0,
- n_features=40)
-
- cls.mod = StreamingRFR(n_estimators_per_chunk=3,
- n_jobs=-1,
- max_n_estimators=np.inf)
-
- # Set expected number of estimators
- cls.expected_n_estimators = 300
-
- # Set helper values
- super().setUpClass()
-
-
-class TestStreamingRFR4(PartialFitTests, unittest.TestCase):
- """
- Test SRFC with single estimator per chunk with "decision tree style" max features. ie, all available to each tree.
-
- No limit on total models, 1 estimators per row subset.
- """
-
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
- cls.n_samples = 1000
- cls.x, cls.y = sklearn.datasets.make_regression(n_samples=int(2e4),
- random_state=0,
- n_features=40)
-
- cls.mod = StreamingRFR(n_estimators_per_chunk=1,
- max_features=cls.x.shape[1],
- max_n_estimators=np.inf)
-
- # Set expected number of estimators
- cls.expected_n_estimators = 100
-
- # Set helper values
- super().setUpClass()
-
-
-class TestStreamingRFR5(PartialFitTests, unittest.TestCase):
- """
- Test SRFC with single estimator per chunk with "decision tree style" max features. ie, all available to each tree.
-
- No limit on total models, 3 estimators per row subset.
- """
-
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
- cls.n_samples = 1000
- cls.x, cls.y = sklearn.datasets.make_regression(n_samples=int(2e4),
- random_state=0,
- n_features=40)
-
- cls.mod = StreamingRFR(n_estimators_per_chunk=3,
- n_jobs=-1,
- max_features=cls.x.shape[1],
- max_n_estimators=np.inf)
-
- # Set expected number of estimators
- cls.expected_n_estimators = 300
-
- # Set helper values
- super().setUpClass()
-
-
-class TestStreamingRFR6(PartialFitTests, unittest.TestCase):
- """
- Test SRFC with single estimator per chunk with "decision tree style" max features. ie, all available to each tree.
-
- No limit on total models, 3 estimators per row subset.
- """
-
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
- cls.n_samples = 1000
- cls.x, cls.y = sklearn.datasets.make_regression(n_samples=int(2e4),
- random_state=0,
- n_features=40)
-
- cls.mod = StreamingRFR(n_estimators_per_chunk=3,
- n_jobs=-1,
- max_features=cls.x.shape[1],
- max_n_estimators=np.inf)
-
- # Set expected number of estimators
- cls.expected_n_estimators = 300
-
- # Set helper values
- super().setUpClass()
-
-
-class TestStreamingRFR7(FitTests, unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
-
- cls.spf_n_fits = 10
- cls.spf_sample_prop = 0.2
- cls.dask_feeding = False
- cls.n_estimators_per_sample = 1
-
- cls.mod = StreamingRFR(verbose=1,
- n_estimators_per_chunk=cls.n_estimators_per_sample,
- max_n_estimators=np.inf,
- dask_feeding=cls.dask_feeding,
- spf_sample_prop=cls.spf_sample_prop,
- spf_n_fits=cls.spf_n_fits)
-
- super().setUpClass()
-
-
-class TestStreamingRFR8(FitTests, unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
- cls.spf_n_fits = 10
- cls.spf_sample_prop = 0.2
- cls.dask_feeding = False
- cls.n_estimators_per_sample = 10
-
- cls.mod = StreamingRFR(verbose=1,
- n_estimators_per_chunk=cls.n_estimators_per_sample,
- max_n_estimators=np.inf,
- dask_feeding=cls.dask_feeding,
- spf_sample_prop=cls.spf_sample_prop,
- spf_n_fits=cls.spf_n_fits)
-
- super().setUpClass()
-
-
-class TestStreamingRFR9(FitTests, unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
-
- cls.spf_n_fits = 20
- cls.spf_sample_prop = 0.1
- cls.dask_feeding = False
- cls.n_estimators_per_sample = 6
-
- cls.mod = StreamingRFR(verbose=2,
- n_estimators_per_chunk=cls.n_estimators_per_sample,
- max_n_estimators=np.inf,
- dask_feeding=cls.dask_feeding,
- spf_sample_prop=cls.spf_sample_prop,
- spf_n_fits=cls.spf_n_fits)
-
- super().setUpClass()
-
-
-class TestStreamingEXTC1(PartialFitTests, unittest.TestCase):
- """
- Test SEXT with single estimator per chunk with "random forest style" max features. ie, subset.
-
- No limit on the total number of trees.
- """
-
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
- cls.n_samples = 1000
- cls.x, cls.y = sklearn.datasets.make_blobs(n_samples=int(2e4),
- random_state=0,
- n_features=40,
- centers=2,
- cluster_std=100)
-
- cls.mod = StreamingEXTC(n_estimators_per_chunk=1,
- max_n_estimators=np.inf)
-
- # Set expected number of estimators
- cls.expected_n_estimators = 100
-
- # Set helper values
- super().setUpClass()
-
-
-class TestStreamingEXTC2(PartialFitTests, unittest.TestCase):
- """
- Test SEXT with single estimator per chunk with "random forest style" max features. ie, subset.
-
- Total models limited to 39.
- """
-
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
- cls.n_samples = 1000
- cls.x, cls.y = sklearn.datasets.make_blobs(n_samples=int(2e4),
- random_state=0,
- n_features=40,
- centers=2,
- cluster_std=100)
-
- cls.mod = StreamingEXTC(n_estimators_per_chunk=1,
- max_n_estimators=39)
-
- # Set expected number of estimators
- cls.expected_n_estimators = 39
-
- # Set helper values
- super().setUpClass()
-
-
-class TestStreamingEXTC3(PartialFitTests, unittest.TestCase):
- """
- Test SEXT with multiple estimators per chunk with "random forest style" max features. ie, subset.
-
- No limit on total models, 3 estimators per row subset (each with different feature subset)
- """
-
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
- cls.n_samples = 1000
- cls.x, cls.y = sklearn.datasets.make_blobs(n_samples=int(2e4),
- random_state=0,
- n_features=40,
- centers=2,
- cluster_std=100)
-
- cls.mod = StreamingEXTC(n_estimators_per_chunk=3,
- n_jobs=-1,
- max_n_estimators=np.inf)
-
- # Set expected number of estimators
- cls.expected_n_estimators = 300
-
- # Set helper values
- super().setUpClass()
-
-
-class TestStreamingEXTC4(PartialFitTests, unittest.TestCase):
- """
- Test SEXT with single estimator per chunk with "decision tree style" max features. ie, all available to each tree.
-
- No limit on total models, 1 estimators per row subset.
- """
-
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
- cls.n_samples = 1000
- cls.x, cls.y = sklearn.datasets.make_blobs(n_samples=int(2e4),
- random_state=0,
- n_features=40,
- centers=2,
- cluster_std=100)
-
- cls.mod = StreamingEXTC(n_estimators_per_chunk=1,
- max_features=cls.x.shape[1],
- max_n_estimators=np.inf)
-
- # Set expected number of estimators
- cls.expected_n_estimators = 100
-
- # Set helper values
- super().setUpClass()
-
-
-class TestStreamingEXTC5(PartialFitTests, unittest.TestCase):
- """
- Test SEXT with single estimator per chunk with "decision tree style" max features. ie, all available to each tree.
-
- No limit on total models, 3 estimators per row subset.
- """
-
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
- cls.n_samples = 1000
- cls.x, cls.y = sklearn.datasets.make_blobs(n_samples=int(2e4),
- random_state=0,
- n_features=40,
- centers=2,
- cluster_std=100)
-
- cls.mod = StreamingEXTC(n_estimators_per_chunk=3,
- n_jobs=-1,
- max_features=cls.x.shape[1],
- max_n_estimators=np.inf)
-
- # Set expected number of estimators
- cls.expected_n_estimators = 300
-
- # Set helper values
- super().setUpClass()
-
-
-class TestStreamingEXTC6(PartialFitTests, unittest.TestCase):
- """
- Test SEXT with single estimator per chunk with "decision tree style" max features. ie, all available to each tree.
-
- No limit on total models, 3 estimators per row subset.
- """
-
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
- cls.n_samples = 1000
- cls.x, cls.y = sklearn.datasets.make_blobs(n_samples=int(2e4),
- random_state=0,
- n_features=40,
- centers=2,
- cluster_std=100)
-
- cls.mod = StreamingEXTC(n_estimators_per_chunk=3,
- n_jobs=-1,
- max_features=cls.x.shape[1],
- max_n_estimators=np.inf)
-
- # Set expected number of estimators
- cls.expected_n_estimators = 300
-
- # Set helper values
- super().setUpClass()
-
-
-class TestStreamingEXTC7(FitTests, unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
-
- cls.spf_n_fits = 10
- cls.spf_sample_prop = 0.1
- cls.dask_feeding = False
- cls.n_estimators_per_sample = 1
-
- cls.mod = StreamingEXTC(verbose=1,
- n_estimators_per_chunk=cls.n_estimators_per_sample,
- max_n_estimators=np.inf,
- dask_feeding=cls.dask_feeding,
- spf_sample_prop=cls.spf_sample_prop,
- spf_n_fits=cls.spf_n_fits)
-
- super().setUpClass()
-
-
-class TestStreamingEXTC8(FitTests, unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
- cls.spf_n_fits = 10
- cls.spf_sample_prop = 0.1
- cls.dask_feeding = False
- cls.n_estimators_per_sample = 10
-
- cls.mod = StreamingEXTC(verbose=1,
- n_estimators_per_chunk=cls.n_estimators_per_sample,
- max_n_estimators=np.inf,
- dask_feeding=cls.dask_feeding,
- spf_sample_prop=cls.spf_sample_prop,
- spf_n_fits=cls.spf_n_fits)
-
- super().setUpClass()
-
-
-class TestStreamingEXTC9(FitTests, unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
-
- cls.spf_n_fits = 20
- cls.spf_sample_prop = 0.1
- cls.dask_feeding = False
- cls.n_estimators_per_sample = 6
-
- cls.mod = StreamingEXTC(verbose=1,
- n_estimators_per_chunk=cls.n_estimators_per_sample,
- max_n_estimators=np.inf,
- dask_feeding=cls.dask_feeding,
- spf_sample_prop=cls.spf_sample_prop,
- spf_n_fits=cls.spf_n_fits)
-
- super().setUpClass()
-
-
-class TestStreamingEXTR1(PartialFitTests, unittest.TestCase):
- """
- Test SEXT with single estimator per chunk with "random forest style" max features. ie, subset.
-
- No limit on the total number of trees.
- """
-
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
- cls.n_samples = 1000
- cls.x, cls.y = sklearn.datasets.make_regression(n_samples=int(2e4),
- random_state=0,
- n_features=40)
-
- cls.mod = StreamingEXTR(n_estimators_per_chunk=1,
- max_n_estimators=np.inf)
-
- # Set expected number of estimators
- cls.expected_n_estimators = 100
-
- # Set helper values
- super().setUpClass()
-
-
-class TestStreamingEXTR2(PartialFitTests, unittest.TestCase):
- """
- Test SEXT with single estimator per chunk with "random forest style" max features. ie, subset.
-
- Total models limited to 39.
- """
-
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
- cls.n_samples = 1000
- cls.x, cls.y = sklearn.datasets.make_regression(n_samples=int(2e4),
- random_state=0,
- n_features=400)
-
- cls.mod = StreamingEXTR(n_estimators_per_chunk=1,
- max_n_estimators=39)
-
- # Set expected number of estimators
- cls.expected_n_estimators = 39
-
- # Set helper values
- super().setUpClass()
-
-
-class TestStreamingEXTR3(PartialFitTests, unittest.TestCase):
- """
- Test SEXT with multiple estimators per chunk with "random forest style" max features. ie, subset.
-
- No limit on total models, 3 estimators per row subset (each with different feature subset)
- """
-
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
- cls.n_samples = 1000
- cls.x, cls.y = sklearn.datasets.make_regression(n_samples=int(2e4),
- random_state=0,
- n_features=40)
-
- cls.mod = StreamingEXTR(n_estimators_per_chunk=3,
- n_jobs=-1,
- max_n_estimators=np.inf)
-
- # Set expected number of estimators
- cls.expected_n_estimators = 300
-
- # Set helper values
- super().setUpClass()
-
-
-class TestStreamingEXTR4(PartialFitTests, unittest.TestCase):
- """
- Test SEXT with single estimator per chunk with "decision tree style" max features. ie, all available to each tree.
-
- No limit on total models, 1 estimators per row subset.
- """
-
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
- cls.n_samples = 1000
- cls.x, cls.y = sklearn.datasets.make_regression(n_samples=int(2e4),
- random_state=0,
- n_features=4)
-
- cls.mod = StreamingEXTR(n_estimators_per_chunk=1,
- max_features=cls.x.shape[1],
- max_n_estimators=np.inf)
-
- # Set expected number of estimators
- cls.expected_n_estimators = 100
-
- # Set helper values
- super().setUpClass()
-
-
-class TestStreamingEXTR5(PartialFitTests, unittest.TestCase):
- """
- Test SEXT with single estimator per chunk with "decision tree style" max features. ie, all available to each tree.
-
- No limit on total models, 3 estimators per row subset.
- """
-
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
- cls.n_samples = 1000
- cls.x, cls.y = sklearn.datasets.make_regression(n_samples=int(2e4),
- random_state=0,
- n_features=40)
-
- cls.mod = StreamingEXTR(n_estimators_per_chunk=3,
- n_jobs=-1,
- max_features=cls.x.shape[1],
- max_n_estimators=np.inf)
-
- # Set expected number of estimators
- cls.expected_n_estimators = 300
-
- # Set helper values
- super().setUpClass()
-
-
-class TestStreamingEXTR6(PartialFitTests, unittest.TestCase):
- """
- Test SEXT with single estimator per chunk with "decision tree style" max features. ie, all available to each tree.
-
- No limit on total models, 3 estimators per row subset.
- """
-
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
- cls.n_samples = 1000
- cls.x, cls.y = sklearn.datasets.make_regression(n_samples=int(2e4),
- random_state=0,
- n_features=40)
-
- cls.mod = StreamingEXTR(n_estimators_per_chunk=3,
- n_jobs=-1,
- max_features=cls.x.shape[1],
- max_n_estimators=np.inf)
-
- # Set expected number of estimators
- cls.expected_n_estimators = 300
-
- # Set helper values
- super().setUpClass()
-
-
-class TestStreamingEXTR7(FitTests, unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
-
- cls.spf_n_fits = 10
- cls.spf_sample_prop = 0.1
- cls.dask_feeding = False
- cls.n_estimators_per_sample = 1
-
- cls.mod = StreamingEXTR(verbose=1,
- n_estimators_per_chunk=cls.n_estimators_per_sample,
- max_n_estimators=np.inf,
- dask_feeding=cls.dask_feeding,
- spf_sample_prop=cls.spf_sample_prop,
- spf_n_fits=cls.spf_n_fits)
-
- super().setUpClass()
-
-
-class TestStreamingEXTR8(FitTests, unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
- cls.spf_n_fits = 10
- cls.spf_sample_prop = 0.1
- cls.dask_feeding = False
- cls.n_estimators_per_sample = 10
-
- cls.mod = StreamingEXTR(verbose=1,
- n_estimators_per_chunk=cls.n_estimators_per_sample,
- max_n_estimators=np.inf,
- dask_feeding=cls.dask_feeding,
- spf_sample_prop=cls.spf_sample_prop,
- spf_n_fits=cls.spf_n_fits)
-
- super().setUpClass()
-
-
-class TestStreamingEXTR9(FitTests, unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
-
- cls.spf_n_fits = 20
- cls.spf_sample_prop = 0.1
- cls.dask_feeding = False
- cls.n_estimators_per_sample = 6
-
- cls.mod = StreamingEXTR(verbose=1,
- n_estimators_per_chunk=cls.n_estimators_per_sample,
- max_n_estimators=np.inf,
- dask_feeding=cls.dask_feeding,
- spf_sample_prop=cls.spf_sample_prop,
- spf_n_fits=cls.spf_n_fits)
-
- super().setUpClass()
diff --git a/tests/integration/incremental_trees/test_trees_dask.py b/tests/integration/incremental_trees/test_trees_dask.py
deleted file mode 100644
index 123313e..0000000
--- a/tests/integration/incremental_trees/test_trees_dask.py
+++ /dev/null
@@ -1,294 +0,0 @@
-import unittest
-
-import numpy as np
-from dask_ml.wrappers import Incremental
-
-from incremental_trees.models.classification.streaming_extc import StreamingEXTC
-from incremental_trees.models.regression.streaming_extr import StreamingEXTR
-from incremental_trees.models.regression.streaming_rfr import StreamingRFR
-from incremental_trees.trees import StreamingRFC
-from tests.integration.base import DaskTests
-
-
-class TestDaskModel1(DaskTests, unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
- cls = cls._prep_data(cls)
- cls.mod = Incremental(StreamingEXTC(n_estimators_per_chunk=1,
- max_n_estimators=39,
- verbose=1))
-
- # Set expected number of estimators
- # This should be set manually depending on data.
- cls.expected_n_estimators = 10
-
- # Set helper values
- super().setUpClass()
-
-
-class TestDaskModel2(DaskTests, unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
- cls = cls._prep_data(cls)
- cls.mod = Incremental(StreamingEXTC(n_estimators_per_chunk=2,
- n_jobs=-1,
- max_n_estimators=np.inf,
- verbose=1))
-
- # Set expected number of estimators
- cls.expected_n_estimators = 20
-
- # Set helper values
- super().setUpClass()
-
-
-class TestDaskModel3(DaskTests, unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
- cls = cls._prep_data(cls)
- cls.mod = Incremental(StreamingEXTC(n_estimators_per_chunk=20,
- n_jobs=-1,
- max_n_estimators=np.inf,
- verbose=1))
-
- # Set expected number of estimators
- cls.expected_n_estimators = 200
-
- # Set helper values
- super().setUpClass()
-
-
-class TestDaskModel4(DaskTests, unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
- cls = cls._prep_data(cls)
- cls.mod = Incremental(StreamingEXTC(n_estimators_per_chunk=1,
- n_jobs=-1,
- max_n_estimators=np.inf,
- max_features=cls.x.shape[1],
- verbose=1))
-
- # Set expected number of estimators
- cls.expected_n_estimators = 10
-
- # Set helper values
- super().setUpClass()
-
-
-class TestDaskModel5(DaskTests, unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
- cls = cls._prep_data(cls,
- reg=True)
- cls.mod = Incremental(StreamingEXTR(n_estimators_per_chunk=1,
- max_n_estimators=39,
- verbose=1))
-
- # Set expected number of estimators
- # This should be set manually depending on data.
- cls.expected_n_estimators = 10
-
- # Set helper values
- super().setUpClass()
-
-
-class TestDaskModel6(DaskTests, unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
- cls = cls._prep_data(cls,
- reg=True)
- cls.mod = Incremental(StreamingEXTR(n_estimators_per_chunk=2,
- n_jobs=-1,
- max_n_estimators=np.inf,
- verbose=1))
-
- # Set expected number of estimators
- cls.expected_n_estimators = 20
-
- # Set helper values
- super().setUpClass()
-
-
-class TestDaskModel7(DaskTests, unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
- cls = cls._prep_data(cls,
- reg=True)
- cls.mod = Incremental(StreamingEXTR(n_estimators_per_chunk=4,
- n_jobs=-1,
- max_n_estimators=np.inf,
- verbose=1))
-
- # Set expected number of estimators
- cls.expected_n_estimators = 40
-
- # Set helper values
- super().setUpClass()
-
-
-class TestDaskModel8(DaskTests, unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
- cls = cls._prep_data(cls,
- reg=True)
- cls.mod = Incremental(StreamingEXTR(n_estimators_per_chunk=1,
- n_jobs=-1,
- max_n_estimators=np.inf,
- max_features=cls.x.shape[1],
- verbose=1))
-
- # Set expected number of estimators
- cls.expected_n_estimators = 10
-
- # Set helper values
- super().setUpClass()
-
-
-class TestDaskRFC1(DaskTests, unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
- cls = cls._prep_data(cls)
- cls.mod = Incremental(StreamingRFC(n_estimators_per_chunk=1,
- max_n_estimators=39,
- verbose=1))
-
- # Set expected number of estimators
- # This should be set manually depending on data.
- cls.expected_n_estimators = 10
-
- # Set helper values
- super().setUpClass()
-
-
-class TestDaskRFC2(DaskTests, unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
- cls = cls._prep_data(cls)
- cls.mod = Incremental(StreamingRFC(n_estimators_per_chunk=2,
- n_jobs=-1,
- max_n_estimators=np.inf,
- verbose=1))
-
- # Set expected number of estimators
- cls.expected_n_estimators = 20
-
- # Set helper values
- super().setUpClass()
-
-
-class TestDaskRFC3(DaskTests, unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
- cls = cls._prep_data(cls)
- cls.mod = Incremental(StreamingRFC(n_estimators_per_chunk=20,
- n_jobs=-1,
- max_n_estimators=np.inf,
- verbose=1))
-
- # Set expected number of estimators
- cls.expected_n_estimators = 200
-
- # Set helper values
- super().setUpClass()
-
-
-class TestDaskRFC4(DaskTests, unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
- cls = cls._prep_data(cls)
- cls.mod = Incremental(StreamingRFC(n_estimators_per_chunk=1,
- n_jobs=-1,
- max_n_estimators=np.inf,
- max_features=cls.x.shape[1],
- verbose=1))
-
- # Set expected number of estimators
- cls.expected_n_estimators = 10
-
- # Set helper values
- super().setUpClass()
-
-
-class TestDaskRFR1(DaskTests, unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
- cls = cls._prep_data(cls,
- reg=True)
- cls.mod = Incremental(StreamingRFR(n_estimators_per_chunk=1,
- max_n_estimators=39,
- verbose=1))
-
- # Set expected number of estimators
- # This should be set manually depending on data.
- cls.expected_n_estimators = 10
-
- # Set helper values
- super().setUpClass()
-
-
-class TestDaskRFR2(DaskTests, unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
- cls = cls._prep_data(cls,
- reg=True)
- cls.mod = Incremental(StreamingRFR(n_estimators_per_chunk=2,
- n_jobs=-1,
- max_n_estimators=np.inf,
- verbose=1))
-
- # Set expected number of estimators
- cls.expected_n_estimators = 20
-
- # Set helper values
- super().setUpClass()
-
-
-class TestDaskRFR3(DaskTests, unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
- cls = cls._prep_data(cls,
- reg=True)
- cls.mod = Incremental(StreamingRFR(n_estimators_per_chunk=20,
- n_jobs=-1,
- max_n_estimators=np.inf,
- verbose=1))
-
- # Set expected number of estimators
- cls.expected_n_estimators = 200
-
- # Set helper values
- super().setUpClass()
-
-
-class TestDaskRFR4(DaskTests, unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- """Set up model to test."""
- cls = cls._prep_data(cls,
- reg=True)
- cls.mod = Incremental(StreamingRFR(n_estimators_per_chunk=1,
- n_jobs=-1,
- max_n_estimators=np.inf,
- max_features=cls.x.shape[1],
- verbose=1))
-
- # Set expected number of estimators
- cls.expected_n_estimators = 10
-
- # Set helper values
- super().setUpClass()
diff --git a/tests/integration/regression/__init__.py b/tests/integration/regression/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/integration/regression/test_streaming_extr.py b/tests/integration/regression/test_streaming_extr.py
new file mode 100644
index 0000000..31f07f2
--- /dev/null
+++ b/tests/integration/regression/test_streaming_extr.py
@@ -0,0 +1,199 @@
+import numpy as np
+from dask_ml.wrappers import Incremental
+from sklearn import datasets
+
+from incremental_trees.models.regression.streaming_extr import StreamingEXTR
+from tests.integration.base.dask_test_base import DaskTestBase
+from tests.integration.base.fit_test_base import FitTestBase
+from tests.integration.base.partial_fit_test_base import PartialFitTestBase
+
+
+class TestStreamingEXTRWithPartialFitsUnlimitedEstimators(PartialFitTestBase):
+ """
+ Test SEXT with single estimator per chunk with "random forest style" max features. ie, subset.
+
+ No limit on the total number of trees.
+ """
+
+ @classmethod
+ def setUpClass(cls):
+ cls.n_samples = 1000
+ cls.x, cls.y = datasets.make_regression(n_samples=int(2e4), random_state=0, n_features=40)
+ cls.mod = StreamingEXTR(n_estimators_per_chunk=1, max_n_estimators=np.inf)
+ cls.expected_n_estimators = 100
+
+ super().setUpClass()
+
+
+class TestStreamingEXTRWithPartialFitsLimitedEstimators(PartialFitTestBase):
+ """
+ Test SEXT with single estimator per chunk with "random forest style" max features. ie, subset.
+
+ Total models limited to 39.
+ """
+
+ @classmethod
+ def setUpClass(cls):
+ cls.n_samples = 1000
+ cls.x, cls.y = datasets.make_regression(n_samples=int(2e4), random_state=0, n_features=400)
+ cls.mod = StreamingEXTR(n_estimators_per_chunk=1, max_n_estimators=39)
+ cls.expected_n_estimators = 39
+
+ super().setUpClass()
+
+
+class TestStreamingEXTRWithPartialFitsMultipleEstimatorsPerChunk(PartialFitTestBase):
+ """
+ Test SEXT with multiple estimators per chunk with "random forest style" max features. ie, subset.
+
+ No limit on total models, 3 estimators per row subset (each with different feature subset)
+ """
+
+ @classmethod
+ def setUpClass(cls):
+ cls.n_samples = 1000
+ cls.x, cls.y = datasets.make_regression(n_samples=int(2e4), random_state=0, n_features=40)
+ cls.mod = StreamingEXTR(n_estimators_per_chunk=3, n_jobs=-1, max_n_estimators=np.inf)
+ cls.expected_n_estimators = 300
+
+ super().setUpClass()
+
+
+class TestStreamingEXTRWithPartialFitsAllFeatures(PartialFitTestBase):
+ """
+ Test SEXT with single estimator per chunk with "decision tree style" max features. ie, all available to each tree.
+
+ No limit on total models, 1 estimators per row subset.
+ """
+
+ @classmethod
+ def setUpClass(cls):
+ cls.n_samples = 1000
+ cls.x, cls.y = datasets.make_regression(n_samples=int(2e4), random_state=0, n_features=4)
+ cls.mod = StreamingEXTR(n_estimators_per_chunk=1, max_features=cls.x.shape[1], max_n_estimators=np.inf)
+ cls.expected_n_estimators = 100
+
+ super().setUpClass()
+
+
+class TestStreamingEXTRWithPartialFitsMultipleEstimatorsPerChunkAllFeatures(PartialFitTestBase):
+ """
+ Test SEXT with single estimator per chunk with "decision tree style" max features. ie, all available to each tree.
+
+ No limit on total models, 3 estimators per row subset.
+ """
+
+ @classmethod
+ def setUpClass(cls):
+ cls.n_samples = 1000
+ cls.x, cls.y = datasets.make_regression(n_samples=int(2e4),
+ random_state=0,
+ n_features=40)
+ cls.mod = StreamingEXTR(n_estimators_per_chunk=3,
+ n_jobs=-1,
+ max_features=cls.x.shape[1],
+ max_n_estimators=np.inf)
+ cls.expected_n_estimators = 300
+
+ super().setUpClass()
+
+
+class TestStreamingEXTRWithFitSingleEstimatorPerChunk(FitTestBase):
+ @classmethod
+ def setUpClass(cls):
+ cls.spf_n_fits = 10
+ cls.n_estimators_per_sample = 1
+ cls.mod = StreamingEXTR(
+ verbose=1,
+ n_estimators_per_chunk=cls.n_estimators_per_sample,
+ max_n_estimators=np.inf,
+ dask_feeding=cls.dask_feeding,
+ spf_sample_prop=cls.spf_sample_prop,
+ spf_n_fits=cls.spf_n_fits
+ )
+
+ super().setUpClass()
+
+
+class TestStreamingEXTRWithFitMultipleEstimatorsPerChunk(FitTestBase):
+ @classmethod
+ def setUpClass(cls):
+ cls.spf_n_fits = 10
+ cls.n_estimators_per_sample = 10
+ cls.mod = StreamingEXTR(
+ verbose=1,
+ n_estimators_per_chunk=cls.n_estimators_per_sample,
+ max_n_estimators=np.inf,
+ dask_feeding=cls.dask_feeding,
+ spf_sample_prop=cls.spf_sample_prop,
+ spf_n_fits=cls.spf_n_fits
+ )
+
+ super().setUpClass()
+
+
+class TestStreamingEXTRWithFitAdditionalSteps(FitTestBase):
+ @classmethod
+ def setUpClass(cls):
+ cls.spf_n_fits = 20
+ cls.n_estimators_per_sample = 6
+ cls.mod = StreamingEXTR(
+ verbose=1,
+ n_estimators_per_chunk=cls.n_estimators_per_sample,
+ max_n_estimators=np.inf,
+ dask_feeding=cls.dask_feeding,
+ spf_sample_prop=cls.spf_sample_prop,
+ spf_n_fits=cls.spf_n_fits
+ )
+
+ super().setUpClass()
+
+
+class TestDaskEXTRWithDask(DaskTestBase):
+ @classmethod
+ def setUpClass(cls):
+ cls._prep_data(cls, reg=True)
+ cls.mod = Incremental(StreamingEXTR(n_estimators_per_chunk=1, max_n_estimators=39, verbose=1))
+ cls.expected_n_estimators = 10
+
+ super().setUpClass()
+
+
+class TestDaskEXTRWithDaskMultipleEstimatorsPerChunk(DaskTestBase):
+ @classmethod
+ def setUpClass(cls):
+ cls._prep_data(cls, reg=True)
+ cls.mod = Incremental(StreamingEXTR(n_estimators_per_chunk=2, n_jobs=-1, max_n_estimators=np.inf, verbose=1))
+ cls.expected_n_estimators = 20
+
+ super().setUpClass()
+
+
+class TestDaskEXTRWithDaskManyEstimatorsPerChunk(DaskTestBase):
+ @classmethod
+ def setUpClass(cls):
+ cls._prep_data(cls, reg=True)
+ cls.mod = Incremental(StreamingEXTR(n_estimators_per_chunk=20, n_jobs=-1, max_n_estimators=np.inf, verbose=1))
+ cls.expected_n_estimators = 200
+
+ super().setUpClass()
+
+
+class TestDaskEXTRWithDaskAlLFeatures(DaskTestBase):
+ @classmethod
+ def setUpClass(cls):
+ cls._prep_data(cls, reg=True)
+ cls.mod = Incremental(
+ StreamingEXTR(
+ n_estimators_per_chunk=1, n_jobs=-1,
+ max_n_estimators=np.inf,
+ max_features=cls.x.shape[1],
+ verbose=1
+ )
+ )
+ cls.expected_n_estimators = 10
+
+ super().setUpClass()
+
+
+del FitTestBase, PartialFitTestBase, DaskTestBase
diff --git a/tests/integration/regression/test_streaming_rfr.py b/tests/integration/regression/test_streaming_rfr.py
new file mode 100644
index 0000000..05d813e
--- /dev/null
+++ b/tests/integration/regression/test_streaming_rfr.py
@@ -0,0 +1,207 @@
+import numpy as np
+from dask_ml.wrappers import Incremental
+from sklearn import datasets
+
+from incremental_trees.models.regression.streaming_rfr import StreamingRFR
+from tests.integration.base.dask_test_base import DaskTestBase
+from tests.integration.base.fit_test_base import FitTestBase
+from tests.integration.base.partial_fit_test_base import PartialFitTestBase
+
+
+class TestStreamingRFRWithPartialFitsUnlimitedEstimators(PartialFitTestBase):
+ """
+ Test SRFC with single estimator per chunk with "random forest style" max features. ie, subset.
+
+ No limit on the total number of trees.
+ """
+
+ @classmethod
+ def setUpClass(cls):
+ """Set up model to test."""
+ cls.n_samples = 1000
+ cls.x, cls.y = datasets.make_regression(n_samples=int(2e4), random_state=0, n_features=40)
+ cls.mod = StreamingRFR(verbose=1, n_estimators_per_chunk=1, max_n_estimators=np.inf)
+ cls.expected_n_estimators = 100
+
+ super().setUpClass()
+
+
+class TestStreamingRFRWithPartialFitsLimitedEstimators(PartialFitTestBase):
+ """
+ Test SRFC with single estimator per chunk with "random forest style" max features. ie, subset.
+
+ Total models limited to 39.
+ """
+
+ @classmethod
+ def setUpClass(cls):
+ """Set up model to test."""
+ cls.n_samples = 1000
+ cls.x, cls.y = datasets.make_regression(n_samples=int(2e4), random_state=0, n_features=40)
+ cls.mod = StreamingRFR(n_estimators_per_chunk=1, max_n_estimators=39)
+ cls.expected_n_estimators = 39
+
+ super().setUpClass()
+
+
+class TestStreamingRFRWithPartialFitsMultipleEstimatorsPerChunk(PartialFitTestBase):
+ """
+ Test SRFC with multiple estimators per chunk with "random forest style" max features. ie, subset.
+
+ No limit on total models, 3 estimators per row subset (each with different feature subset)
+ """
+
+ @classmethod
+ def setUpClass(cls):
+ """Set up model to test."""
+ cls.n_samples = 1000
+ cls.x, cls.y = datasets.make_regression(n_samples=int(2e4), random_state=0, n_features=40)
+ cls.mod = StreamingRFR(n_estimators_per_chunk=3, n_jobs=-1, max_n_estimators=np.inf)
+ cls.expected_n_estimators = 300
+
+ super().setUpClass()
+
+
+class TestStreamingRFRWithPartialFitsAllFeatures(PartialFitTestBase):
+ """
+ Test SRFC with single estimator per chunk with "decision tree style" max features. ie, all available to each tree.
+
+ No limit on total models, 1 estimators per row subset.
+ """
+
+ @classmethod
+ def setUpClass(cls):
+ """Set up model to test."""
+ cls.n_samples = 1000
+ cls.x, cls.y = datasets.make_regression(n_samples=int(2e4), random_state=0, n_features=40)
+ cls.mod = StreamingRFR(n_estimators_per_chunk=1, max_features=cls.x.shape[1], max_n_estimators=np.inf)
+ cls.expected_n_estimators = 100
+
+ super().setUpClass()
+
+
+class TestStreamingRFRWithPartialFitsMultipleEstimatorsPerChunkAllFeatures(PartialFitTestBase):
+ """
+ Test SRFC with single estimator per chunk with "decision tree style" max features. ie, all available to each tree.
+
+ No limit on total models, 3 estimators per row subset.
+ """
+
+ @classmethod
+ def setUpClass(cls):
+ cls.n_samples = 1000
+ cls.x, cls.y = datasets.make_regression(n_samples=int(2e4), random_state=0, n_features=40)
+ cls.mod = StreamingRFR(
+ n_estimators_per_chunk=3,
+ n_jobs=-1,
+ max_features=cls.x.shape[1],
+ max_n_estimators=np.inf
+ )
+ cls.expected_n_estimators = 300
+
+ super().setUpClass()
+
+
+class TestStreamingEXTRWithFitSingleEstimatorPerChunk(FitTestBase):
+ @classmethod
+ def setUpClass(cls):
+ cls.spf_n_fits = 10
+ cls.n_estimators_per_sample = 1
+
+ cls.mod = StreamingRFR(
+ verbose=1,
+ n_estimators_per_chunk=cls.n_estimators_per_sample,
+ max_n_estimators=np.inf,
+ dask_feeding=cls.dask_feeding,
+ spf_sample_prop=cls.spf_sample_prop,
+ spf_n_fits=cls.spf_n_fits
+ )
+
+ super().setUpClass()
+
+
+class TestStreamingEXTRWithFitMultipleEstimatorsPerChunk(FitTestBase):
+ @classmethod
+ def setUpClass(cls):
+ """Set up model to test."""
+ cls.spf_n_fits = 10
+ cls.n_estimators_per_sample = 10
+ cls.mod = StreamingRFR(
+ verbose=1,
+ n_estimators_per_chunk=cls.n_estimators_per_sample,
+ max_n_estimators=np.inf,
+ dask_feeding=cls.dask_feeding,
+ spf_sample_prop=cls.spf_sample_prop,
+ spf_n_fits=cls.spf_n_fits
+ )
+
+ super().setUpClass()
+
+
+class TestStreamingEXTRWithFitAdditionalSteps(FitTestBase):
+ @classmethod
+ def setUpClass(cls):
+ """Set up model to test."""
+
+ cls.spf_n_fits = 20
+ cls.n_estimators_per_sample = 6
+ cls.mod = StreamingRFR(
+ verbose=2,
+ n_estimators_per_chunk=cls.n_estimators_per_sample,
+ max_n_estimators=np.inf,
+ dask_feeding=cls.dask_feeding,
+ spf_sample_prop=cls.spf_sample_prop,
+ spf_n_fits=cls.spf_n_fits
+ )
+
+ super().setUpClass()
+
+
+class TestDaskRFRWithDask(DaskTestBase):
+ @classmethod
+ def setUpClass(cls):
+ cls._prep_data(cls, reg=True)
+ cls.mod = Incremental(StreamingRFR(n_estimators_per_chunk=1, max_n_estimators=39, verbose=1))
+ cls.expected_n_estimators = 10
+
+ super().setUpClass()
+
+
+class TestDaskRFRWithDaskMultipleEstimatorsPerChunk(DaskTestBase):
+ @classmethod
+ def setUpClass(cls):
+ cls._prep_data(cls, reg=True)
+ cls.mod = Incremental(StreamingRFR(n_estimators_per_chunk=2, n_jobs=-1, max_n_estimators=np.inf, verbose=1))
+ cls.expected_n_estimators = 20
+
+ super().setUpClass()
+
+
+class TestDaskRFRWithDaskManyEstimatorsPerChunk(DaskTestBase):
+ @classmethod
+ def setUpClass(cls):
+ cls._prep_data(cls, reg=True)
+ cls.mod = Incremental(StreamingRFR(n_estimators_per_chunk=20, n_jobs=-1, max_n_estimators=np.inf, verbose=1))
+ cls.expected_n_estimators = 200
+
+ super().setUpClass()
+
+
+class TestDaskRFRWithDaskAlLFeatures(DaskTestBase):
+ @classmethod
+ def setUpClass(cls):
+ cls._prep_data(cls, reg=True)
+ cls.mod = Incremental(
+ StreamingRFR(
+ n_estimators_per_chunk=1, n_jobs=-1,
+ max_n_estimators=np.inf,
+ max_features=cls.x.shape[1],
+ verbose=1
+ )
+ )
+ cls.expected_n_estimators = 10
+
+ super().setUpClass()
+
+
+del FitTestBase, PartialFitTestBase, DaskTestBase
diff --git a/tests/performance/__init__.py b/tests/performance/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/integration/incremental_trees/test_trees_benchmarks.py b/tests/performance/test_benchmarks.py
similarity index 93%
rename from tests/integration/incremental_trees/test_trees_benchmarks.py
rename to tests/performance/test_benchmarks.py
index 823b559..989dc0f 100644
--- a/tests/integration/incremental_trees/test_trees_benchmarks.py
+++ b/tests/performance/test_benchmarks.py
@@ -2,11 +2,11 @@
import numpy as np
from distributed import LocalCluster, Client
+from incremental_trees.models.classification.streaming_rfc import StreamingRFC
from sklearn import clone
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
-from incremental_trees.trees import StreamingRFC
from tests.common.data_fixture import DataFixture
@@ -42,12 +42,12 @@ def _fit_benchmarks(self):
self.rfc.fit(self.x_train, self.y_train)
self.rfc_once.fit(self.x_train, self.y_train)
- self.log_reg_report, self.log_reg_train_auc, self.log_reg_test_auc = self._mod_report(self,
- mod=self.log_reg)
- self.rfc_report, self.rfc_train_auc, self.rfc_test_auc = self._mod_report(self,
- mod=self.rfc)
- self.rfc_once_report, self.rfc_once_train_auc, self.rfc_once_test_auc = self._mod_report(self,
- mod=self.rfc_once)
+ self.log_reg_report, self.log_reg_train_auc, self.log_reg_test_auc = self._mod_report(self, mod=self.log_reg)
+ self.rfc_report, self.rfc_train_auc, self.rfc_test_auc = self._mod_report(self, mod=self.rfc)
+ self.rfc_once_report, self.rfc_once_train_auc, self.rfc_once_test_auc = self._mod_report(
+ self,
+ mod=self.rfc_once
+ )
return self
@@ -60,14 +60,14 @@ def _assert_same_n_rows(self):
# Will be available in actual test.
n_rows = self.x_train.shape[0]
- self.assertEqual(self.rfc_n_estimators * n_rows,
- (self.srfc_n_estimators_per_chunk *
- self.srfc_n_partial_fit_calls *
- int(n_rows / self.srfc_n_partial_fit_calls)))
+ self.assertEqual(
+ self.rfc_n_estimators * n_rows,
+ (self.srfc_n_estimators_per_chunk *
+ self.srfc_n_partial_fit_calls *
+ int(n_rows / self.srfc_n_partial_fit_calls))
+ )
- def _fit_srfc(self,
- sequential: bool = True,
- n_prop: float = 0.1) -> StreamingRFC:
+ def _fit_srfc(self, sequential: bool = True, n_prop: float = 0.1) -> StreamingRFC:
"""
Fit the streaming RFC. Total number of rows used in training varies depending on sequential.
@@ -123,7 +123,7 @@ def _fit_with_dask(self):
n_workers=2,
threads_per_worker=2,
scheduler_port=8080,
- diagnostics_port=8081) as cluster, Client(cluster) as client:
+ diagnostics_port=8081) as cluster, Client(cluster):
self.srfc_dask.fit(self.x_train, self.y_train)
def _fit_with_spf(self):
diff --git a/tests/integration/incremental_trees/test_trees_grids.py b/tests/performance/test_grids.py
similarity index 93%
rename from tests/integration/incremental_trees/test_trees_grids.py
rename to tests/performance/test_grids.py
index 4886558..9ddb7fc 100644
--- a/tests/integration/incremental_trees/test_trees_grids.py
+++ b/tests/performance/test_grids.py
@@ -1,5 +1,3 @@
-# TODO: These tests aren't finished. Need to generalise, add EXTC, regressors, etc.
-
import unittest
import warnings
@@ -7,18 +5,14 @@
from sklearn.model_selection import RandomizedSearchCV
from incremental_trees.models.classification.streaming_extc import StreamingEXTC
-from incremental_trees.trees import StreamingRFC
+from incremental_trees.models.classification.streaming_rfc import StreamingRFC
from tests.common.data_fixture import DataFixture
from tests.common.param_fixtures import RFCGRID, SRFCGRID
class GridBenchmarks:
def test_fit_all(self):
- """
- Fit grids and compare.
-
- TODO: Generalise naming.
- """
+ """Fit grids and compare."""
with warnings.catch_warnings():
warnings.simplefilter('ignore', FutureWarning)
diff --git a/tests/requirements.txt b/tests/requirements.txt
new file mode 100644
index 0000000..e079f8a
--- /dev/null
+++ b/tests/requirements.txt
@@ -0,0 +1 @@
+pytest