From 8097342cfea0a3c29bbb21d6aa1c910c9e379b78 Mon Sep 17 00:00:00 2001
From: "He, Dan H" <dan.h.he@intel.com>
Date: Wed, 26 Apr 2023 18:12:57 +0800
Subject: [PATCH] Add PyTorch linear regression example

    This adds a new tutorial example on distributing a linear regression task over OpenFL cluster.

    The model is defined by Pytorch which is able to run over both cpu (by default) and gpu. The dataset is generated by make_regression from sklearn.datasets with pre-defined parameters.

    Fixes 797

    Co-authored-by: Jiang, Jiaqiu <jiaqiu.jiang@intel.com>
    Signed-off-by: He, Dan H <dan.h.he@intel.com>
    Signed-off-by: Jiang, Jiaqiu <jiaqiu.jiang@intel.com>
    Signed-off-by: Li, Qingqing <qingqing.li@intel.com>
    Signed-off-by: Wang, Le <le3.wang@intel.com>
    Signed-off-by: Wu, Caili <caili.wu@intel.com>
---
 .../PyTorch_LinearRegression/README.md        |  56 +++
 .../director/director_config.yaml             |   6 +
 .../director/start_director.sh                |   4 +
 .../envoy/envoy_config.yaml                   |   9 +
 .../envoy/regression_shard_descriptor.py      |  71 ++++
 .../envoy/requirements.txt                    |   7 +
 .../envoy/start_envoy.sh                      |   6 +
 .../workspace/requirements.txt                |   7 +
 .../workspace/torch_linear_regression.ipynb   | 388 ++++++++++++++++++
 9 files changed, 554 insertions(+)
 create mode 100644 openfl-tutorials/interactive_api/PyTorch_LinearRegression/README.md
 create mode 100644 openfl-tutorials/interactive_api/PyTorch_LinearRegression/director/director_config.yaml
 create mode 100755 openfl-tutorials/interactive_api/PyTorch_LinearRegression/director/start_director.sh
 create mode 100644 openfl-tutorials/interactive_api/PyTorch_LinearRegression/envoy/envoy_config.yaml
 create mode 100644 openfl-tutorials/interactive_api/PyTorch_LinearRegression/envoy/regression_shard_descriptor.py
 create mode 100644 openfl-tutorials/interactive_api/PyTorch_LinearRegression/envoy/requirements.txt
 create mode 100755 openfl-tutorials/interactive_api/PyTorch_LinearRegression/envoy/start_envoy.sh
 create mode 100644 openfl-tutorials/interactive_api/PyTorch_LinearRegression/workspace/requirements.txt
 create mode 100644 openfl-tutorials/interactive_api/PyTorch_LinearRegression/workspace/torch_linear_regression.ipynb

diff --git a/openfl-tutorials/interactive_api/PyTorch_LinearRegression/README.md b/openfl-tutorials/interactive_api/PyTorch_LinearRegression/README.md
new file mode 100644
index 00000000000..84c75e5369c
--- /dev/null
+++ b/openfl-tutorials/interactive_api/PyTorch_LinearRegression/README.md
@@ -0,0 +1,56 @@
+# PyTorch based Linear Regression Tutorial
+
+### 1. About dataset
+
+Generate a random regression problem using `make_regression` from sklearn.datasets with pre-defined parameters.
+
+Define the below param in envoy.yaml config to shard the dataset across participants/envoy.
+- rank_worldsize
+
+
+### 2. About model
+
+Simple Regression Model based on PyTorch.
+
+
+### 3. How to run this tutorial (without TLC and locally as a simulation):
+
+1. Run director:
+
+```sh
+cd director_folder
+./start_director.sh
+```
+
+2. Run envoy:
+
+Step 1: Activate virtual environment and install packages
+```
+cd envoy_folder
+pip install -r requirements.txt
+```
+Step 2: start the envoy
+```sh
+./start_envoy.sh env_instance_1 envoy_config.yaml
+```
+
+Optional: start second envoy:
+
+- Copy `envoy_folder` to another place and follow the same process as above:
+
+```sh
+./start_envoy.sh env_instance_2 envoy_config.yaml
+```
+
+3. Run `torch_linear_regression.ipynb` jupyter notebook:
+
+```sh
+cd workspace
+jupyter lab torch_linear_regression.ipynb
+```
+
+4. Visualization
+
+```
+tensorboard --logdir logs/
+```
diff --git a/openfl-tutorials/interactive_api/PyTorch_LinearRegression/director/director_config.yaml b/openfl-tutorials/interactive_api/PyTorch_LinearRegression/director/director_config.yaml
new file mode 100644
index 00000000000..d22b4b77661
--- /dev/null
+++ b/openfl-tutorials/interactive_api/PyTorch_LinearRegression/director/director_config.yaml
@@ -0,0 +1,6 @@
+settings:
+  listen_host: localhost
+  listen_port: 50050
+  sample_shape: ['1'] # Modify this param if experimenting with `n_features` of shard_descriptor.
+  target_shape: ['1']
+  envoy_health_check_period: 5  # in seconds
diff --git a/openfl-tutorials/interactive_api/PyTorch_LinearRegression/director/start_director.sh b/openfl-tutorials/interactive_api/PyTorch_LinearRegression/director/start_director.sh
new file mode 100755
index 00000000000..5806a6cc0a7
--- /dev/null
+++ b/openfl-tutorials/interactive_api/PyTorch_LinearRegression/director/start_director.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+set -e
+
+fx director start --disable-tls -c director_config.yaml
\ No newline at end of file
diff --git a/openfl-tutorials/interactive_api/PyTorch_LinearRegression/envoy/envoy_config.yaml b/openfl-tutorials/interactive_api/PyTorch_LinearRegression/envoy/envoy_config.yaml
new file mode 100644
index 00000000000..8f35387ca8d
--- /dev/null
+++ b/openfl-tutorials/interactive_api/PyTorch_LinearRegression/envoy/envoy_config.yaml
@@ -0,0 +1,9 @@
+params:
+  cuda_devices: []
+
+optional_plugin_components: {}
+
+shard_descriptor:
+  template: regression_shard_descriptor.RegressionShardDescriptor
+  params:
+    rank_worldsize: 1, 2
\ No newline at end of file
diff --git a/openfl-tutorials/interactive_api/PyTorch_LinearRegression/envoy/regression_shard_descriptor.py b/openfl-tutorials/interactive_api/PyTorch_LinearRegression/envoy/regression_shard_descriptor.py
new file mode 100644
index 00000000000..6aa35cbae5f
--- /dev/null
+++ b/openfl-tutorials/interactive_api/PyTorch_LinearRegression/envoy/regression_shard_descriptor.py
@@ -0,0 +1,71 @@
+# Copyright (C) 2020-2022 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+"""Noisy-Sin Shard Descriptor."""
+
+from typing import List
+
+import numpy as np
+import torch
+from sklearn.datasets import make_regression
+from sklearn.model_selection import train_test_split
+
+from openfl.interface.interactive_api.shard_descriptor import ShardDescriptor
+
+
+class RegressionShardDescriptor(ShardDescriptor):
+    """Regression Shard descriptor class."""
+
+    def __init__(self, rank_worldsize: str = '1, 1', **kwargs) -> None:
+        """
+        Initialize Regression Data Shard Descriptor.
+
+        This Shard Descriptor generate random regression data with some gaussian centered noise
+        using make_regression method from sklearn.datasets.
+        Shards data across participants using rank and world size.
+        """
+
+        self.rank, self.worldsize = tuple(int(num) for num in rank_worldsize.split(','))
+        X_train, y_train, X_test, y_test = self.generate_data()
+        self.data_by_type = {
+            'train': np.concatenate((X_train, y_train[:, None]), axis=1),
+            'val': np.concatenate((X_test, y_test[:, None]), axis=1)
+        }
+
+    def generate_data(self):
+        """Generate regression dataset with predefined params."""
+        x, y = make_regression(n_samples=1000, n_features=1, noise=14, random_state=24)
+        X_train, X_test, y_train, y_test = train_test_split(x, y, random_state=24)
+        self.data = np.concatenate((x, y[:, None]), axis=1)
+        return X_train, y_train, X_test, y_test
+
+    def get_shard_dataset_types(self) -> List[str]:
+        """Get available shard dataset types."""
+        return list(self.data_by_type)
+
+    def get_dataset(self, dataset_type='train'):
+        """Return a shard dataset by type."""
+        if dataset_type not in self.data_by_type:
+            raise Exception(f'Incorrect dataset type: {dataset_type}')
+
+        if dataset_type in ['train', 'val']:
+            return torch.tensor(self.data_by_type[dataset_type][self.rank - 1::self.worldsize], dtype=torch.float32)
+        else:
+            raise ValueError
+
+    @property
+    def sample_shape(self) -> List[str]:
+        """Return the sample shape info."""
+        (*x, _) = self.data[0]
+        return [str(i) for i in np.array(x, ndmin=1).shape]
+
+    @property
+    def target_shape(self) -> List[str]:
+        """Return the target shape info."""
+        (*_, y) = self.data[0]
+        return [str(i) for i in np.array(y, ndmin=1).shape]
+
+    @property
+    def dataset_description(self) -> str:
+        """Return the dataset description."""
+        return (f'Regression dataset, shard number {self.rank}'
+                f' out of {self.worldsize}')
diff --git a/openfl-tutorials/interactive_api/PyTorch_LinearRegression/envoy/requirements.txt b/openfl-tutorials/interactive_api/PyTorch_LinearRegression/envoy/requirements.txt
new file mode 100644
index 00000000000..0baaf04f874
--- /dev/null
+++ b/openfl-tutorials/interactive_api/PyTorch_LinearRegression/envoy/requirements.txt
@@ -0,0 +1,7 @@
+openfl>=1.2.1
+numpy>=1.13.3
+torch>=1.13.1
+scikit-learn>=0.24.1
+mistune>=2.0.3 # not directly required, pinned by Snyk to avoid a vulnerability
+setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability
+wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability
diff --git a/openfl-tutorials/interactive_api/PyTorch_LinearRegression/envoy/start_envoy.sh b/openfl-tutorials/interactive_api/PyTorch_LinearRegression/envoy/start_envoy.sh
new file mode 100755
index 00000000000..4da07821af4
--- /dev/null
+++ b/openfl-tutorials/interactive_api/PyTorch_LinearRegression/envoy/start_envoy.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+set -e
+ENVOY_NAME=$1
+ENVOY_CONF=$2
+
+fx envoy start -n "$ENVOY_NAME" --disable-tls --envoy-config-path "$ENVOY_CONF" -dh localhost -dp 50050
diff --git a/openfl-tutorials/interactive_api/PyTorch_LinearRegression/workspace/requirements.txt b/openfl-tutorials/interactive_api/PyTorch_LinearRegression/workspace/requirements.txt
new file mode 100644
index 00000000000..f2177597843
--- /dev/null
+++ b/openfl-tutorials/interactive_api/PyTorch_LinearRegression/workspace/requirements.txt
@@ -0,0 +1,7 @@
+openfl>=1.2.1
+numpy>=1.13.3
+torch>=1.13.1
+scikit-learn>=0.24.1
+jupyterlab
+setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability
+wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability
diff --git a/openfl-tutorials/interactive_api/PyTorch_LinearRegression/workspace/torch_linear_regression.ipynb b/openfl-tutorials/interactive_api/PyTorch_LinearRegression/workspace/torch_linear_regression.ipynb
new file mode 100644
index 00000000000..d7ad3e310f6
--- /dev/null
+++ b/openfl-tutorials/interactive_api/PyTorch_LinearRegression/workspace/torch_linear_regression.ipynb
@@ -0,0 +1,388 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Torch Regression Example - Interactive API"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "import torch.optim as optim"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "NUM_FEATURES = 1\n",
+    "LEARNING_RATE = 0.5"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Torch Definitions"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class LRModel(nn.Module):\n",
+    "\n",
+    "    def __init__(self, in_features: int, out_features: int) -> None:\n",
+    "        super().__init__()\n",
+    "        self.fc = torch.nn.Linear(in_features, out_features)\n",
+    "    \n",
+    "    def forward(self, x: torch.Tensor) -> torch.Tensor:\n",
+    "        return self.fc(x)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = LRModel(NUM_FEATURES, 1)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Optimizer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Loss function"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "loss_fn = nn.MSELoss()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Federation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import copy"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from openfl.interface.interactive_api.experiment import TaskInterface, DataInterface, ModelInterface, FLExperiment"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class LRDataset(DataInterface):\n",
+    "    def __init__(self, train_bs: int = 1024, val_bs: int = 1024, **kwargs):\n",
+    "        super().__init__(**kwargs)\n",
+    "        self._train_bs = train_bs\n",
+    "        self._val_bs = val_bs\n",
+    "        self._train_data = None\n",
+    "        self._val_data = None\n",
+    "\n",
+    "    @property\n",
+    "    def shard_descriptor(self):\n",
+    "        return self._shard_descriptor\n",
+    "\n",
+    "    @shard_descriptor.setter\n",
+    "    def shard_descriptor(self, shard_descriptor):\n",
+    "        \"\"\"\n",
+    "        Describe per-collaborator procedures or sharding.\n",
+    "\n",
+    "        This method will be called during a collaborator initialization.\n",
+    "        Local shard_descriptor will be set by Envoy.\n",
+    "        \"\"\"\n",
+    "        self._shard_descriptor = shard_descriptor\n",
+    "        self._train_data = self._shard_descriptor.get_dataset('train')\n",
+    "        self._val_data = self._shard_descriptor.get_dataset('val')\n",
+    "        \n",
+    "    def get_train_loader(self, **kwargs):\n",
+    "        \"\"\"\n",
+    "        Output of this method will be provided to tasks with optimizer in contract\n",
+    "        \"\"\"\n",
+    "        if self._train_data is None:\n",
+    "            raise ValueError(\"train data is not set\")\n",
+    "        return torch.utils.data.DataLoader(self._train_data, batch_size=self._train_bs, shuffle=True)\n",
+    "\n",
+    "    def get_valid_loader(self, **kwargs):\n",
+    "        \"\"\"\n",
+    "        Output of this method will be provided to tasks without optimizer in contract\n",
+    "        \"\"\"\n",
+    "        if self._val_data is None:\n",
+    "            raise ValueError(\"validation data is not set\")\n",
+    "        return torch.utils.data.DataLoader(self._val_data, batch_size=self._val_bs)\n",
+    "\n",
+    "    def get_train_data_size(self):\n",
+    "        \"\"\"\n",
+    "        Information for aggregation\n",
+    "        \"\"\"\n",
+    "        if self._train_data is None:\n",
+    "            raise ValueError(\"train data is not set\")\n",
+    "        return len(self._train_data)\n",
+    "\n",
+    "    def get_valid_data_size(self):\n",
+    "        \"\"\"\n",
+    "        Information for aggregation\n",
+    "        \"\"\"\n",
+    "        if self._val_data is None:\n",
+    "            raise ValueError(\"validation data is not set\")\n",
+    "        return len(self._val_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fl_dataset = LRDataset()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Register model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "framework_adapter = 'openfl.plugins.frameworks_adapters.pytorch_adapter.FrameworkAdapterPlugin'\n",
+    "model_interface = ModelInterface(model=model, optimizer=optimizer, framework_plugin=framework_adapter)\n",
+    "\n",
+    "# Save the initial model state\n",
+    "initial_model = copy.deepcopy(model)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Register tasks"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "task_interface = TaskInterface()\n",
+    "\n",
+    "# Task interface currently supports only standalone functions.\n",
+    "@task_interface.add_kwargs(**{'loss_fn': loss_fn})\n",
+    "@task_interface.register_fl_task(model='model', data_loader='train_loader', device='device', optimizer='optimizer')     \n",
+    "def train(model, train_loader, optimizer, device, loss_fn):    \n",
+    "    model.to(device)\n",
+    "    model.train()\n",
+    "\n",
+    "    losses = []\n",
+    "    for data in train_loader:\n",
+    "        data = data.to(device)\n",
+    "        optimizer.zero_grad()\n",
+    "        loss = loss_fn(model(data[:,:NUM_FEATURES]), data[:,NUM_FEATURES:])\n",
+    "        loss.backward()\n",
+    "        optimizer.step()\n",
+    "        losses.append(loss.detach().cpu().numpy())\n",
+    "\n",
+    "    return {'train_mse': np.mean(losses)}\n",
+    "\n",
+    "\n",
+    "@task_interface.add_kwargs(**{'loss_fn': loss_fn})\n",
+    "@task_interface.register_fl_task(model='model', data_loader='val_loader', device='device')     \n",
+    "def validate(model, val_loader, device, loss_fn):\n",
+    "    model.to(device)\n",
+    "    model.eval()\n",
+    "    \n",
+    "    losses = []\n",
+    "    with torch.no_grad():\n",
+    "        for data in val_loader:\n",
+    "            data = data.to(device)\n",
+    "            loss = loss_fn(model(data[:,:NUM_FEATURES]), data[:,NUM_FEATURES:])\n",
+    "            losses.append(loss.detach().cpu().numpy())\n",
+    "\n",
+    "    return {'val_mse': np.mean(losses)}"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Create Federation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from openfl.interface.interactive_api.federation import Federation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# please use the same identificator that was used in signed certificate\n",
+    "client_id = 'frontend'\n",
+    "director_node_fqdn = 'localhost'\n",
+    "director_port = 50050\n",
+    "\n",
+    "federation = Federation(\n",
+    "    client_id=client_id,\n",
+    "    director_node_fqdn=director_node_fqdn,\n",
+    "    director_port=director_port,\n",
+    "    tls=False\n",
+    ")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Run Federation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create an experimnet in federation\n",
+    "experiment_name = 'torch_linear_regression_experiment'\n",
+    "fl_experiment = FLExperiment(federation=federation, experiment_name=experiment_name)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# The following command zips the workspace and python requirements to be transfered to collaborator nodes\n",
+    "fl_experiment.start(\n",
+    "    model_provider=model_interface, \n",
+    "    task_keeper=task_interface,\n",
+    "    data_loader=fl_dataset,\n",
+    "    rounds_to_train=10\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fl_experiment.stream_metrics()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "osh",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.16"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}