From 967b10a5599231631199eb8c506d46e4cab6b78c Mon Sep 17 00:00:00 2001
From: xdssio <jonathan@xdss.io>
Date: Mon, 29 Nov 2021 14:50:13 +0100
Subject: [PATCH 01/11] cleaning

---
 packages/vaex-ml/vaex/ml/vowpalwabbit.py | 149 +++++++++++++++++++++++
 tests/ml/vowpalwabbit_test.py            |  50 ++++++++
 2 files changed, 199 insertions(+)
 create mode 100644 packages/vaex-ml/vaex/ml/vowpalwabbit.py
 create mode 100644 tests/ml/vowpalwabbit_test.py

diff --git a/packages/vaex-ml/vaex/ml/vowpalwabbit.py b/packages/vaex-ml/vaex/ml/vowpalwabbit.py
new file mode 100644
index 0000000000..023ef05a86
--- /dev/null
+++ b/packages/vaex-ml/vaex/ml/vowpalwabbit.py
@@ -0,0 +1,149 @@
+import base64
+import tempfile
+
+import numpy as np
+import pandas as pd
+import traitlets
+import vaex.serialize
+from sklearn.utils import shuffle
+from vowpalwabbit.DFtoVW import DFtoVW
+from vowpalwabbit.pyvw import vw
+
+from . import generate
+from . import state
+
+
+@vaex.serialize.register
+@generate.register
+class VowpalWabbitModel(state.HasState):
+    '''The Vowpal Wabbit algorithm.
+
+    This class provides an interface to the Vowpal Wabbit package.
+
+    Vowpal Wabbit provides fast, efficient, and flexible online machine learning
+    techniques for reinforcement learning, supervised learning, and more.
+    It is influenced by an ecosystem of community contributions, academic research, and proven algorithms.
+    Microsoft Research is a major contributor to Vowpal Wabbit.
+
+    For more information, please visit https://vowpalwabbit.org/index.html.
+
+    Example:
+
+    >>> import vaex.ml
+    >>> import vaex.ml.vowpalwabbit
+    >>> df = vaex.ml.datasets.load_iris()
+    >>> df['class_'] = df['class_']+1 # Vowpal Wabbit classification is an int stareting from 1.
+    >>> features = ['sepal_width', 'petal_length', 'sepal_length', 'petal_width']
+    >>> df_train, df_test = df.ml.train_test_split()
+    >>> params = { 'oaa': '3', 'P': 1}
+    >>> booster = vaex.ml.vowpalwabbit.VowpalWabbitModel(features=features, target='class_', epochs=100, params=params)
+    >>> booster.fit(df_train)
+    >>> df_train = booster.transform(df_train)
+    >>> df_train.head(3)
+     #    sepal_width    petal_length    sepal_length    petal_width    class_    vowpalwabbit_prediction
+     0            3               4.5             5.4            1.5         2    2
+     1            3.4             1.6             4.8            0.2         1    1
+     2            3.1             4.9             6.9            1.5         2    2
+    >>> df_test = booster.transform(df_test)
+    >>> df_test.head(3)
+     #    sepal_width    petal_length    sepal_length    petal_width    class_    vowpalwabbit_prediction
+     0            3               4.2             5.9            1.5         2    2
+     1            3               4.6             6.1            1.4         2    2
+     2            2.9             4.6             6.6            1.3         2    2
+    '''
+    snake_name = 'vowpalwabbit_model'
+    features = traitlets.List(traitlets.Unicode(), help='List of features to use when fitting the Vowpal Wabbit.')
+    target = traitlets.Unicode(allow_none=False, help='The name of the target column.')
+    passes = traitlets.CInt(help='Number of iterations.')
+    params = traitlets.Dict(default_value={}, help='parameters to be passed on the to the Vowpal Wabbit model.')
+    prediction_name = traitlets.Unicode(default_value='vowpalwabbit_prediction',
+                                        help='The name of the virtual column housing the predictions.')
+
+    def __call__(self, *args):
+        data2d = np.array(args).T
+        return self.predict(data2d)
+
+    def transform(self, df):
+        '''Transform a DataFrame such that it contains the predictions of the
+        Vowpal Wabbit in form of a virtual column.
+
+        :param df: A vaex DataFrame.
+
+        :return copy: A shallow copy of the DataFrame that includes the Vowpal Wabbit prediction as a virtual column.
+        :rtype: DataFrame
+        '''
+        copy = df.copy()
+        lazy_function = copy.add_function('vowpalwabbit_prediction_function', self, unique=True)
+        expression = lazy_function(*self.features)
+        copy.add_virtual_column(self.prediction_name, expression, unique=False)
+        return copy
+
+    def fit(self, df, passes=1, chunk_size=500, partial_fit=False):
+        """Fit the VowpalWabbitModel to the DataFrame.
+        :param df: A vaex DataFrame containing the features and target on which to train the model.
+        :param int passes: Number of passes over the data
+        :param int chunk_size: Size of chunks to iterate
+        """
+        passes = passes or self.passes
+        params = {k: v for k, v in self.params.items() if v is not None}
+        target = self.target
+        features = self.features
+        model = self.model if (hasattr(self, 'model') and self.model is not None and partial_fit) else vw(**self.params)
+        for n in range(passes):
+            for _, _, X in df.to_pandas_df(chunk_size=chunk_size):
+                if n > 1:
+                    X = shuffle(X)
+                for ex in DFtoVW.from_colnames(df=X, y=target, x=features).convert_df():
+                    model.learn(ex)
+        self.model = model
+        return self
+
+    def predict(self, df, **kwargs):
+        '''Get an in-memory numpy array with the predictions of the VowpalWabbitModel on a vaex DataFrame.
+        This method accepts the key word arguments of the predict method from VowpalWabbit.
+
+        :param df: A vaex DataFrame.
+
+        :returns: A in-memory numpy array containing the VowpalWabbitModel predictions.
+        :rtype: numpy.array
+        '''
+        if self.model is None:
+            raise RuntimeError("model is not fitted")
+        X = pd.DataFrame(df, columns=self.features) if isinstance(df, np.ndarray) else df[
+            self.features].to_pandas_df()
+        X[self.target] = 1  # DFtoVW.from_colnames issue - will be ignored in predictions
+        examples = DFtoVW.from_colnames(df=X, y=self.target, x=self.features).convert_df()
+        return np.array([self.model.predict(ex) for ex in examples])
+
+    def _encode_vw(self):
+        if self.model is None:
+            return None
+        if isinstance(self.model, bytes):
+            return self.model
+        filename = tempfile.mktemp()
+        self.model.save(filename)
+        with open(filename, 'rb') as f:
+            model_data = f.read()
+        return base64.encodebytes(model_data).decode('ascii')
+
+    def _decode_vw(self, encoding):
+        if encoding is None:
+            return vw(**self.params)
+        if isinstance(encoding, str):
+            model_data = base64.decodebytes(encoding.encode('ascii'))
+            openfilename = tempfile.mktemp()
+            with open(openfilename, 'wb') as f:
+                f.write(model_data)
+            params = self.params.copy()
+            params['i'] = openfilename
+            return vw(**params)
+        else:
+            return encoding
+
+    def state_get(self):
+        return dict(model_state=self._encode_vw(),
+                    substate=super(VowpalWabbitModel, self).state_get())
+
+    def state_set(self, state, trusted=True):
+        super(VowpalWabbitModel, self).state_set(state['substate'])
+        self.model = self._decode_vw(state['model_state'])
diff --git a/tests/ml/vowpalwabbit_test.py b/tests/ml/vowpalwabbit_test.py
new file mode 100644
index 0000000000..2885a51ae3
--- /dev/null
+++ b/tests/ml/vowpalwabbit_test.py
@@ -0,0 +1,50 @@
+import sys
+
+import pytest
+
+pytest.importorskip("vowpalwabbit")
+from sklearn.metrics import accuracy_score
+import vaex.ml.vowpalwabbit
+import vaex.ml.datasets
+
+params = {'oaa': '3', 'P': 1, 'enable_logging': True}
+
+
+@pytest.mark.skipif(sys.version_info < (3, 6), reason="requires python3.6 or higher")
+def test_vowpalwabbit(df_iris):
+    ds = df_iris
+
+    ds['class_'] = ds['class_'] + 1  # VW classification starts from 1
+    ds['x'] = ds.sepal_length * 1
+    ds['y'] = ds.sepal_width * 1
+    ds['w'] = ds.petal_length * 1
+    ds['z'] = ds.petal_width * 1
+    ds_train, ds_test = ds.ml.train_test_split(test_size=0.2, verbose=False)
+    features = ['x', 'y', 'z', 'w']
+
+    params = {'oaa': '3', 'P': 1, 'link': 'logistic', 'enable_logging': True}
+    model = vaex.ml.vowpalwabbit.VowpalWabbitModel(
+        params=params,
+        features=features,
+        target='class_')
+    model.fit(ds_train)
+    assert 0 < accuracy_score(ds_test.col.class_.values, model.predict(ds_test))
+
+    ds_train = model.transform(ds_train)  # this will add the lightgbm_prediction column
+    state = ds_train.state_get()
+    ds_test.state_set(state)
+
+
+@pytest.mark.skipif(sys.version_info < (3, 6), reason="requires python3.6 or higher")
+def test_vowpalwabbit_serialize(tmpdir, df_iris):
+    ds = df_iris
+    ds['class_'] = ds['class_'] + 1  # VW classification starts from 1
+    features = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']
+    model = vaex.ml.vowpalwabbit.VowpalWabbitModel(
+        params=params,
+        features=features,
+        target='class_')
+    model.fit(ds)
+    pl = vaex.ml.Pipeline([model])
+    pl.save(str(tmpdir.join('test.json')))
+    pl.load(str(tmpdir.join('test.json')))

From fd489ce22f3be77ae35909dd8fbcc532bbe48115 Mon Sep 17 00:00:00 2001
From: xdssio <jonathan@xdss.io>
Date: Mon, 29 Nov 2021 14:59:26 +0100
Subject: [PATCH 02/11] cleaning

---
 tests/ml/vowpalwabbit_test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/ml/vowpalwabbit_test.py b/tests/ml/vowpalwabbit_test.py
index 2885a51ae3..877d527ce1 100644
--- a/tests/ml/vowpalwabbit_test.py
+++ b/tests/ml/vowpalwabbit_test.py
@@ -27,10 +27,10 @@ def test_vowpalwabbit(df_iris):
         params=params,
         features=features,
         target='class_')
-    model.fit(ds_train)
+    model.fit(ds_train, passes=5)
     assert 0 < accuracy_score(ds_test.col.class_.values, model.predict(ds_test))
 
-    ds_train = model.transform(ds_train)  # this will add the lightgbm_prediction column
+    ds_train = model.transform(ds_train)  # this will add the vw column
     state = ds_train.state_get()
     ds_test.state_set(state)
 

From 320c9cadb3f9858a9367d79c112c74fddeb18426 Mon Sep 17 00:00:00 2001
From: xdssio <37710064+xdssio@users.noreply.github.com>
Date: Sun, 12 Dec 2021 19:10:42 +0100
Subject: [PATCH 03/11] Update packages/vaex-ml/vaex/ml/vowpalwabbit.py

pip8 clean-up

Co-authored-by: Jovan Veljanoski <jovan.veljanoski@gmail.com>
---
 packages/vaex-ml/vaex/ml/vowpalwabbit.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/vaex-ml/vaex/ml/vowpalwabbit.py b/packages/vaex-ml/vaex/ml/vowpalwabbit.py
index 023ef05a86..4e0ba9047c 100644
--- a/packages/vaex-ml/vaex/ml/vowpalwabbit.py
+++ b/packages/vaex-ml/vaex/ml/vowpalwabbit.py
@@ -35,7 +35,7 @@ class VowpalWabbitModel(state.HasState):
     >>> df['class_'] = df['class_']+1 # Vowpal Wabbit classification is an int stareting from 1.
     >>> features = ['sepal_width', 'petal_length', 'sepal_length', 'petal_width']
     >>> df_train, df_test = df.ml.train_test_split()
-    >>> params = { 'oaa': '3', 'P': 1}
+    >>> params = {'oaa': '3', 'P': 1}
     >>> booster = vaex.ml.vowpalwabbit.VowpalWabbitModel(features=features, target='class_', epochs=100, params=params)
     >>> booster.fit(df_train)
     >>> df_train = booster.transform(df_train)

From 0f15cafa3178aeb3e696ef89e0f861835984cb8e Mon Sep 17 00:00:00 2001
From: xdssio <37710064+xdssio@users.noreply.github.com>
Date: Sun, 12 Dec 2021 19:11:03 +0100
Subject: [PATCH 04/11] Update packages/vaex-ml/vaex/ml/vowpalwabbit.py

rename-fix

Co-authored-by: Jovan Veljanoski <jovan.veljanoski@gmail.com>
---
 packages/vaex-ml/vaex/ml/vowpalwabbit.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/vaex-ml/vaex/ml/vowpalwabbit.py b/packages/vaex-ml/vaex/ml/vowpalwabbit.py
index 4e0ba9047c..232cdda9d1 100644
--- a/packages/vaex-ml/vaex/ml/vowpalwabbit.py
+++ b/packages/vaex-ml/vaex/ml/vowpalwabbit.py
@@ -37,7 +37,7 @@ class VowpalWabbitModel(state.HasState):
     >>> df_train, df_test = df.ml.train_test_split()
     >>> params = {'oaa': '3', 'P': 1}
     >>> booster = vaex.ml.vowpalwabbit.VowpalWabbitModel(features=features, target='class_', epochs=100, params=params)
-    >>> booster.fit(df_train)
+    >>> vw_model.fit(df_train)
     >>> df_train = booster.transform(df_train)
     >>> df_train.head(3)
      #    sepal_width    petal_length    sepal_length    petal_width    class_    vowpalwabbit_prediction

From 7c3dfc5027e9e9b6a2eb59433610f63bee2b5a6b Mon Sep 17 00:00:00 2001
From: xdssio <37710064+xdssio@users.noreply.github.com>
Date: Sun, 12 Dec 2021 19:11:16 +0100
Subject: [PATCH 05/11] Update packages/vaex-ml/vaex/ml/vowpalwabbit.py

Co-authored-by: Jovan Veljanoski <jovan.veljanoski@gmail.com>
---
 packages/vaex-ml/vaex/ml/vowpalwabbit.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/vaex-ml/vaex/ml/vowpalwabbit.py b/packages/vaex-ml/vaex/ml/vowpalwabbit.py
index 232cdda9d1..2b5a1fadc2 100644
--- a/packages/vaex-ml/vaex/ml/vowpalwabbit.py
+++ b/packages/vaex-ml/vaex/ml/vowpalwabbit.py
@@ -38,7 +38,7 @@ class VowpalWabbitModel(state.HasState):
     >>> params = {'oaa': '3', 'P': 1}
     >>> booster = vaex.ml.vowpalwabbit.VowpalWabbitModel(features=features, target='class_', epochs=100, params=params)
     >>> vw_model.fit(df_train)
-    >>> df_train = booster.transform(df_train)
+    >>> df_train = vw_model.transform(df_train)
     >>> df_train.head(3)
      #    sepal_width    petal_length    sepal_length    petal_width    class_    vowpalwabbit_prediction
      0            3               4.5             5.4            1.5         2    2

From a7efd24d3dd322d4782e5e71a81e07d01be31da2 Mon Sep 17 00:00:00 2001
From: xdssio <37710064+xdssio@users.noreply.github.com>
Date: Sun, 12 Dec 2021 19:11:24 +0100
Subject: [PATCH 06/11] Update packages/vaex-ml/vaex/ml/vowpalwabbit.py

Co-authored-by: Jovan Veljanoski <jovan.veljanoski@gmail.com>
---
 packages/vaex-ml/vaex/ml/vowpalwabbit.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/vaex-ml/vaex/ml/vowpalwabbit.py b/packages/vaex-ml/vaex/ml/vowpalwabbit.py
index 2b5a1fadc2..d95dca2a3b 100644
--- a/packages/vaex-ml/vaex/ml/vowpalwabbit.py
+++ b/packages/vaex-ml/vaex/ml/vowpalwabbit.py
@@ -44,7 +44,7 @@ class VowpalWabbitModel(state.HasState):
      0            3               4.5             5.4            1.5         2    2
      1            3.4             1.6             4.8            0.2         1    1
      2            3.1             4.9             6.9            1.5         2    2
-    >>> df_test = booster.transform(df_test)
+    >>> df_test = vw_model.transform(df_test)
     >>> df_test.head(3)
      #    sepal_width    petal_length    sepal_length    petal_width    class_    vowpalwabbit_prediction
      0            3               4.2             5.9            1.5         2    2

From ffb174f058b21ebfba9735cf9aa1fcfdf2f199fb Mon Sep 17 00:00:00 2001
From: xdssio <37710064+xdssio@users.noreply.github.com>
Date: Sun, 12 Dec 2021 19:11:37 +0100
Subject: [PATCH 07/11] Update packages/vaex-ml/vaex/ml/vowpalwabbit.py

Co-authored-by: Jovan Veljanoski <jovan.veljanoski@gmail.com>
---
 packages/vaex-ml/vaex/ml/vowpalwabbit.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/vaex-ml/vaex/ml/vowpalwabbit.py b/packages/vaex-ml/vaex/ml/vowpalwabbit.py
index d95dca2a3b..7da969e581 100644
--- a/packages/vaex-ml/vaex/ml/vowpalwabbit.py
+++ b/packages/vaex-ml/vaex/ml/vowpalwabbit.py
@@ -32,7 +32,7 @@ class VowpalWabbitModel(state.HasState):
     >>> import vaex.ml
     >>> import vaex.ml.vowpalwabbit
     >>> df = vaex.ml.datasets.load_iris()
-    >>> df['class_'] = df['class_']+1 # Vowpal Wabbit classification is an int stareting from 1.
+    >>> df['class_'] = df['class_']+1 # Vowpal Wabbit classification target should be an int starting from 1.
     >>> features = ['sepal_width', 'petal_length', 'sepal_length', 'petal_width']
     >>> df_train, df_test = df.ml.train_test_split()
     >>> params = {'oaa': '3', 'P': 1}

From fe9aceea74b904f868fcd846217f47d591a0052d Mon Sep 17 00:00:00 2001
From: xdssio <jonathan@xdss.io>
Date: Wed, 22 Dec 2021 16:13:38 +0100
Subject: [PATCH 08/11] implement rename a map

---
 packages/vaex-core/vaex/dataframe.py | 14 ++++++++++++++
 tests/rename_test.py                 | 12 ++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/packages/vaex-core/vaex/dataframe.py b/packages/vaex-core/vaex/dataframe.py
index eebdb56904..d2fc814892 100644
--- a/packages/vaex-core/vaex/dataframe.py
+++ b/packages/vaex-core/vaex/dataframe.py
@@ -3664,6 +3664,20 @@ def add_virtual_column(self, name, expression, unique=False):
         self._save_assign_expression(valid_name)
         self.signal_column_changed.emit(self, valid_name, "add")
 
+    def renames(self, names, unique=False):
+        """Renames a column or variable, and rewrite expressions such that they refer to the new name"""
+        columns = self.get_column_names()
+        ret = []
+        for name, new_name in names.items():
+            if name == new_name:
+                continue
+            if name not in columns:
+                continue
+            new_name = vaex.utils.find_valid_name(new_name, used=None if not unique else self.get_column_names(hidden=True))
+            self._rename(name, new_name, rename_meta_data=True)
+            ret.append(new_name)
+        return ret
+
     def rename(self, name, new_name, unique=False):
         """Renames a column or variable, and rewrite expressions such that they refer to the new name"""
         if name == new_name:
diff --git a/tests/rename_test.py b/tests/rename_test.py
index 066369f7e0..70c4a060f9 100644
--- a/tests/rename_test.py
+++ b/tests/rename_test.py
@@ -1,5 +1,17 @@
 from common import *
 
+
+def test_renames(df_local):
+    ds = df_local
+    new_columns = ds.renames({'x': 'x1', 'y': 'y1'})
+    assert new_columns == ['x1', 'y1']
+    current_columns = ds.get_column_names()
+    for column in new_columns:
+        assert column in current_columns
+    for column in ['x', 'y']:
+        assert column not in current_columns
+
+
 def test_rename(ds_filtered):
     ds = ds_filtered
     ds['r'] = ds.x

From a0f01e22b046c1721d8e34e71daf77c52c1bf1cb Mon Sep 17 00:00:00 2001
From: xdssio <37710064+xdssio@users.noreply.github.com>
Date: Wed, 22 Dec 2021 16:31:43 +0100
Subject: [PATCH 09/11] Delete vowpalwabbit_test.py

unrelated
---
 tests/ml/vowpalwabbit_test.py | 50 -----------------------------------
 1 file changed, 50 deletions(-)
 delete mode 100644 tests/ml/vowpalwabbit_test.py

diff --git a/tests/ml/vowpalwabbit_test.py b/tests/ml/vowpalwabbit_test.py
deleted file mode 100644
index 877d527ce1..0000000000
--- a/tests/ml/vowpalwabbit_test.py
+++ /dev/null
@@ -1,50 +0,0 @@
-import sys
-
-import pytest
-
-pytest.importorskip("vowpalwabbit")
-from sklearn.metrics import accuracy_score
-import vaex.ml.vowpalwabbit
-import vaex.ml.datasets
-
-params = {'oaa': '3', 'P': 1, 'enable_logging': True}
-
-
-@pytest.mark.skipif(sys.version_info < (3, 6), reason="requires python3.6 or higher")
-def test_vowpalwabbit(df_iris):
-    ds = df_iris
-
-    ds['class_'] = ds['class_'] + 1  # VW classification starts from 1
-    ds['x'] = ds.sepal_length * 1
-    ds['y'] = ds.sepal_width * 1
-    ds['w'] = ds.petal_length * 1
-    ds['z'] = ds.petal_width * 1
-    ds_train, ds_test = ds.ml.train_test_split(test_size=0.2, verbose=False)
-    features = ['x', 'y', 'z', 'w']
-
-    params = {'oaa': '3', 'P': 1, 'link': 'logistic', 'enable_logging': True}
-    model = vaex.ml.vowpalwabbit.VowpalWabbitModel(
-        params=params,
-        features=features,
-        target='class_')
-    model.fit(ds_train, passes=5)
-    assert 0 < accuracy_score(ds_test.col.class_.values, model.predict(ds_test))
-
-    ds_train = model.transform(ds_train)  # this will add the vw column
-    state = ds_train.state_get()
-    ds_test.state_set(state)
-
-
-@pytest.mark.skipif(sys.version_info < (3, 6), reason="requires python3.6 or higher")
-def test_vowpalwabbit_serialize(tmpdir, df_iris):
-    ds = df_iris
-    ds['class_'] = ds['class_'] + 1  # VW classification starts from 1
-    features = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']
-    model = vaex.ml.vowpalwabbit.VowpalWabbitModel(
-        params=params,
-        features=features,
-        target='class_')
-    model.fit(ds)
-    pl = vaex.ml.Pipeline([model])
-    pl.save(str(tmpdir.join('test.json')))
-    pl.load(str(tmpdir.join('test.json')))

From 7fa267a547067baab7a896d77820cbabb925449b Mon Sep 17 00:00:00 2001
From: xdssio <37710064+xdssio@users.noreply.github.com>
Date: Wed, 22 Dec 2021 16:32:00 +0100
Subject: [PATCH 10/11] Delete vowpalwabbit.py

unrelated
---
 packages/vaex-ml/vaex/ml/vowpalwabbit.py | 149 -----------------------
 1 file changed, 149 deletions(-)
 delete mode 100644 packages/vaex-ml/vaex/ml/vowpalwabbit.py

diff --git a/packages/vaex-ml/vaex/ml/vowpalwabbit.py b/packages/vaex-ml/vaex/ml/vowpalwabbit.py
deleted file mode 100644
index 7da969e581..0000000000
--- a/packages/vaex-ml/vaex/ml/vowpalwabbit.py
+++ /dev/null
@@ -1,149 +0,0 @@
-import base64
-import tempfile
-
-import numpy as np
-import pandas as pd
-import traitlets
-import vaex.serialize
-from sklearn.utils import shuffle
-from vowpalwabbit.DFtoVW import DFtoVW
-from vowpalwabbit.pyvw import vw
-
-from . import generate
-from . import state
-
-
-@vaex.serialize.register
-@generate.register
-class VowpalWabbitModel(state.HasState):
-    '''The Vowpal Wabbit algorithm.
-
-    This class provides an interface to the Vowpal Wabbit package.
-
-    Vowpal Wabbit provides fast, efficient, and flexible online machine learning
-    techniques for reinforcement learning, supervised learning, and more.
-    It is influenced by an ecosystem of community contributions, academic research, and proven algorithms.
-    Microsoft Research is a major contributor to Vowpal Wabbit.
-
-    For more information, please visit https://vowpalwabbit.org/index.html.
-
-    Example:
-
-    >>> import vaex.ml
-    >>> import vaex.ml.vowpalwabbit
-    >>> df = vaex.ml.datasets.load_iris()
-    >>> df['class_'] = df['class_']+1 # Vowpal Wabbit classification target should be an int starting from 1.
-    >>> features = ['sepal_width', 'petal_length', 'sepal_length', 'petal_width']
-    >>> df_train, df_test = df.ml.train_test_split()
-    >>> params = {'oaa': '3', 'P': 1}
-    >>> booster = vaex.ml.vowpalwabbit.VowpalWabbitModel(features=features, target='class_', epochs=100, params=params)
-    >>> vw_model.fit(df_train)
-    >>> df_train = vw_model.transform(df_train)
-    >>> df_train.head(3)
-     #    sepal_width    petal_length    sepal_length    petal_width    class_    vowpalwabbit_prediction
-     0            3               4.5             5.4            1.5         2    2
-     1            3.4             1.6             4.8            0.2         1    1
-     2            3.1             4.9             6.9            1.5         2    2
-    >>> df_test = vw_model.transform(df_test)
-    >>> df_test.head(3)
-     #    sepal_width    petal_length    sepal_length    petal_width    class_    vowpalwabbit_prediction
-     0            3               4.2             5.9            1.5         2    2
-     1            3               4.6             6.1            1.4         2    2
-     2            2.9             4.6             6.6            1.3         2    2
-    '''
-    snake_name = 'vowpalwabbit_model'
-    features = traitlets.List(traitlets.Unicode(), help='List of features to use when fitting the Vowpal Wabbit.')
-    target = traitlets.Unicode(allow_none=False, help='The name of the target column.')
-    passes = traitlets.CInt(help='Number of iterations.')
-    params = traitlets.Dict(default_value={}, help='parameters to be passed on the to the Vowpal Wabbit model.')
-    prediction_name = traitlets.Unicode(default_value='vowpalwabbit_prediction',
-                                        help='The name of the virtual column housing the predictions.')
-
-    def __call__(self, *args):
-        data2d = np.array(args).T
-        return self.predict(data2d)
-
-    def transform(self, df):
-        '''Transform a DataFrame such that it contains the predictions of the
-        Vowpal Wabbit in form of a virtual column.
-
-        :param df: A vaex DataFrame.
-
-        :return copy: A shallow copy of the DataFrame that includes the Vowpal Wabbit prediction as a virtual column.
-        :rtype: DataFrame
-        '''
-        copy = df.copy()
-        lazy_function = copy.add_function('vowpalwabbit_prediction_function', self, unique=True)
-        expression = lazy_function(*self.features)
-        copy.add_virtual_column(self.prediction_name, expression, unique=False)
-        return copy
-
-    def fit(self, df, passes=1, chunk_size=500, partial_fit=False):
-        """Fit the VowpalWabbitModel to the DataFrame.
-        :param df: A vaex DataFrame containing the features and target on which to train the model.
-        :param int passes: Number of passes over the data
-        :param int chunk_size: Size of chunks to iterate
-        """
-        passes = passes or self.passes
-        params = {k: v for k, v in self.params.items() if v is not None}
-        target = self.target
-        features = self.features
-        model = self.model if (hasattr(self, 'model') and self.model is not None and partial_fit) else vw(**self.params)
-        for n in range(passes):
-            for _, _, X in df.to_pandas_df(chunk_size=chunk_size):
-                if n > 1:
-                    X = shuffle(X)
-                for ex in DFtoVW.from_colnames(df=X, y=target, x=features).convert_df():
-                    model.learn(ex)
-        self.model = model
-        return self
-
-    def predict(self, df, **kwargs):
-        '''Get an in-memory numpy array with the predictions of the VowpalWabbitModel on a vaex DataFrame.
-        This method accepts the key word arguments of the predict method from VowpalWabbit.
-
-        :param df: A vaex DataFrame.
-
-        :returns: A in-memory numpy array containing the VowpalWabbitModel predictions.
-        :rtype: numpy.array
-        '''
-        if self.model is None:
-            raise RuntimeError("model is not fitted")
-        X = pd.DataFrame(df, columns=self.features) if isinstance(df, np.ndarray) else df[
-            self.features].to_pandas_df()
-        X[self.target] = 1  # DFtoVW.from_colnames issue - will be ignored in predictions
-        examples = DFtoVW.from_colnames(df=X, y=self.target, x=self.features).convert_df()
-        return np.array([self.model.predict(ex) for ex in examples])
-
-    def _encode_vw(self):
-        if self.model is None:
-            return None
-        if isinstance(self.model, bytes):
-            return self.model
-        filename = tempfile.mktemp()
-        self.model.save(filename)
-        with open(filename, 'rb') as f:
-            model_data = f.read()
-        return base64.encodebytes(model_data).decode('ascii')
-
-    def _decode_vw(self, encoding):
-        if encoding is None:
-            return vw(**self.params)
-        if isinstance(encoding, str):
-            model_data = base64.decodebytes(encoding.encode('ascii'))
-            openfilename = tempfile.mktemp()
-            with open(openfilename, 'wb') as f:
-                f.write(model_data)
-            params = self.params.copy()
-            params['i'] = openfilename
-            return vw(**params)
-        else:
-            return encoding
-
-    def state_get(self):
-        return dict(model_state=self._encode_vw(),
-                    substate=super(VowpalWabbitModel, self).state_get())
-
-    def state_set(self, state, trusted=True):
-        super(VowpalWabbitModel, self).state_set(state['substate'])
-        self.model = self._decode_vw(state['model_state'])

From 768dd65bd9c76232f8beccc157b0cdc2ee3fd629 Mon Sep 17 00:00:00 2001
From: xdssio <jonathan@xdss.io>
Date: Tue, 4 Jan 2022 13:40:45 +0100
Subject: [PATCH 11/11] rename accepts a dict

---
 packages/vaex-core/vaex/dataframe.py | 6 ++++--
 tests/rename_test.py                 | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/packages/vaex-core/vaex/dataframe.py b/packages/vaex-core/vaex/dataframe.py
index d2fc814892..4b7de7666b 100644
--- a/packages/vaex-core/vaex/dataframe.py
+++ b/packages/vaex-core/vaex/dataframe.py
@@ -3664,7 +3664,7 @@ def add_virtual_column(self, name, expression, unique=False):
         self._save_assign_expression(valid_name)
         self.signal_column_changed.emit(self, valid_name, "add")
 
-    def renames(self, names, unique=False):
+    def _renames(self, names, unique=False):
         """Renames a column or variable, and rewrite expressions such that they refer to the new name"""
         columns = self.get_column_names()
         ret = []
@@ -3678,8 +3678,10 @@ def renames(self, names, unique=False):
             ret.append(new_name)
         return ret
 
-    def rename(self, name, new_name, unique=False):
+    def rename(self, name, new_name=None, unique=False):
         """Renames a column or variable, and rewrite expressions such that they refer to the new name"""
+        if isinstance(name, dict):
+            return self._renames(name, unique=unique)
         if name == new_name:
             return
         new_name = vaex.utils.find_valid_name(new_name, used=None if not unique else self.get_column_names(hidden=True))
diff --git a/tests/rename_test.py b/tests/rename_test.py
index 70c4a060f9..4ccec6aa3a 100644
--- a/tests/rename_test.py
+++ b/tests/rename_test.py
@@ -3,7 +3,7 @@
 
 def test_renames(df_local):
     ds = df_local
-    new_columns = ds.renames({'x': 'x1', 'y': 'y1'})
+    new_columns = ds.rename({'x': 'x1', 'y': 'y1'})
     assert new_columns == ['x1', 'y1']
     current_columns = ds.get_column_names()
     for column in new_columns: