From 8097342cfea0a3c29bbb21d6aa1c910c9e379b78 Mon Sep 17 00:00:00 2001 From: "He, Dan H" Date: Wed, 26 Apr 2023 18:12:57 +0800 Subject: [PATCH] Add PyTorch linear regression example This adds a new tutorial example on distributing a linear regression task over OpenFL cluster. The model is defined by Pytorch which is able to run over both cpu (by default) and gpu. The dataset is generated by make_regression from sklearn.datasets with pre-defined parameters. Fixes 797 Co-authored-by: Jiang, Jiaqiu Signed-off-by: He, Dan H Signed-off-by: Jiang, Jiaqiu Signed-off-by: Li, Qingqing Signed-off-by: Wang, Le Signed-off-by: Wu, Caili --- .../PyTorch_LinearRegression/README.md | 56 +++ .../director/director_config.yaml | 6 + .../director/start_director.sh | 4 + .../envoy/envoy_config.yaml | 9 + .../envoy/regression_shard_descriptor.py | 71 ++++ .../envoy/requirements.txt | 7 + .../envoy/start_envoy.sh | 6 + .../workspace/requirements.txt | 7 + .../workspace/torch_linear_regression.ipynb | 388 ++++++++++++++++++ 9 files changed, 554 insertions(+) create mode 100644 openfl-tutorials/interactive_api/PyTorch_LinearRegression/README.md create mode 100644 openfl-tutorials/interactive_api/PyTorch_LinearRegression/director/director_config.yaml create mode 100755 openfl-tutorials/interactive_api/PyTorch_LinearRegression/director/start_director.sh create mode 100644 openfl-tutorials/interactive_api/PyTorch_LinearRegression/envoy/envoy_config.yaml create mode 100644 openfl-tutorials/interactive_api/PyTorch_LinearRegression/envoy/regression_shard_descriptor.py create mode 100644 openfl-tutorials/interactive_api/PyTorch_LinearRegression/envoy/requirements.txt create mode 100755 openfl-tutorials/interactive_api/PyTorch_LinearRegression/envoy/start_envoy.sh create mode 100644 openfl-tutorials/interactive_api/PyTorch_LinearRegression/workspace/requirements.txt create mode 100644 openfl-tutorials/interactive_api/PyTorch_LinearRegression/workspace/torch_linear_regression.ipynb diff --git a/openfl-tutorials/interactive_api/PyTorch_LinearRegression/README.md b/openfl-tutorials/interactive_api/PyTorch_LinearRegression/README.md new file mode 100644 index 00000000000..84c75e5369c --- /dev/null +++ b/openfl-tutorials/interactive_api/PyTorch_LinearRegression/README.md @@ -0,0 +1,56 @@ +# PyTorch based Linear Regression Tutorial + +### 1. About dataset + +Generate a random regression problem using `make_regression` from sklearn.datasets with pre-defined parameters. + +Define the below param in envoy.yaml config to shard the dataset across participants/envoy. +- rank_worldsize + + +### 2. About model + +Simple Regression Model based on PyTorch. + + +### 3. How to run this tutorial (without TLC and locally as a simulation): + +1. Run director: + +```sh +cd director_folder +./start_director.sh +``` + +2. Run envoy: + +Step 1: Activate virtual environment and install packages +``` +cd envoy_folder +pip install -r requirements.txt +``` +Step 2: start the envoy +```sh +./start_envoy.sh env_instance_1 envoy_config.yaml +``` + +Optional: start second envoy: + +- Copy `envoy_folder` to another place and follow the same process as above: + +```sh +./start_envoy.sh env_instance_2 envoy_config.yaml +``` + +3. Run `torch_linear_regression.ipynb` jupyter notebook: + +```sh +cd workspace +jupyter lab torch_linear_regression.ipynb +``` + +4. Visualization + +``` +tensorboard --logdir logs/ +``` diff --git a/openfl-tutorials/interactive_api/PyTorch_LinearRegression/director/director_config.yaml b/openfl-tutorials/interactive_api/PyTorch_LinearRegression/director/director_config.yaml new file mode 100644 index 00000000000..d22b4b77661 --- /dev/null +++ b/openfl-tutorials/interactive_api/PyTorch_LinearRegression/director/director_config.yaml @@ -0,0 +1,6 @@ +settings: + listen_host: localhost + listen_port: 50050 + sample_shape: ['1'] # Modify this param if experimenting with `n_features` of shard_descriptor. + target_shape: ['1'] + envoy_health_check_period: 5 # in seconds diff --git a/openfl-tutorials/interactive_api/PyTorch_LinearRegression/director/start_director.sh b/openfl-tutorials/interactive_api/PyTorch_LinearRegression/director/start_director.sh new file mode 100755 index 00000000000..5806a6cc0a7 --- /dev/null +++ b/openfl-tutorials/interactive_api/PyTorch_LinearRegression/director/start_director.sh @@ -0,0 +1,4 @@ +#!/bin/bash +set -e + +fx director start --disable-tls -c director_config.yaml \ No newline at end of file diff --git a/openfl-tutorials/interactive_api/PyTorch_LinearRegression/envoy/envoy_config.yaml b/openfl-tutorials/interactive_api/PyTorch_LinearRegression/envoy/envoy_config.yaml new file mode 100644 index 00000000000..8f35387ca8d --- /dev/null +++ b/openfl-tutorials/interactive_api/PyTorch_LinearRegression/envoy/envoy_config.yaml @@ -0,0 +1,9 @@ +params: + cuda_devices: [] + +optional_plugin_components: {} + +shard_descriptor: + template: regression_shard_descriptor.RegressionShardDescriptor + params: + rank_worldsize: 1, 2 \ No newline at end of file diff --git a/openfl-tutorials/interactive_api/PyTorch_LinearRegression/envoy/regression_shard_descriptor.py b/openfl-tutorials/interactive_api/PyTorch_LinearRegression/envoy/regression_shard_descriptor.py new file mode 100644 index 00000000000..6aa35cbae5f --- /dev/null +++ b/openfl-tutorials/interactive_api/PyTorch_LinearRegression/envoy/regression_shard_descriptor.py @@ -0,0 +1,71 @@ +# Copyright (C) 2020-2022 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +"""Noisy-Sin Shard Descriptor.""" + +from typing import List + +import numpy as np +import torch +from sklearn.datasets import make_regression +from sklearn.model_selection import train_test_split + +from openfl.interface.interactive_api.shard_descriptor import ShardDescriptor + + +class RegressionShardDescriptor(ShardDescriptor): + """Regression Shard descriptor class.""" + + def __init__(self, rank_worldsize: str = '1, 1', **kwargs) -> None: + """ + Initialize Regression Data Shard Descriptor. + + This Shard Descriptor generate random regression data with some gaussian centered noise + using make_regression method from sklearn.datasets. + Shards data across participants using rank and world size. + """ + + self.rank, self.worldsize = tuple(int(num) for num in rank_worldsize.split(',')) + X_train, y_train, X_test, y_test = self.generate_data() + self.data_by_type = { + 'train': np.concatenate((X_train, y_train[:, None]), axis=1), + 'val': np.concatenate((X_test, y_test[:, None]), axis=1) + } + + def generate_data(self): + """Generate regression dataset with predefined params.""" + x, y = make_regression(n_samples=1000, n_features=1, noise=14, random_state=24) + X_train, X_test, y_train, y_test = train_test_split(x, y, random_state=24) + self.data = np.concatenate((x, y[:, None]), axis=1) + return X_train, y_train, X_test, y_test + + def get_shard_dataset_types(self) -> List[str]: + """Get available shard dataset types.""" + return list(self.data_by_type) + + def get_dataset(self, dataset_type='train'): + """Return a shard dataset by type.""" + if dataset_type not in self.data_by_type: + raise Exception(f'Incorrect dataset type: {dataset_type}') + + if dataset_type in ['train', 'val']: + return torch.tensor(self.data_by_type[dataset_type][self.rank - 1::self.worldsize], dtype=torch.float32) + else: + raise ValueError + + @property + def sample_shape(self) -> List[str]: + """Return the sample shape info.""" + (*x, _) = self.data[0] + return [str(i) for i in np.array(x, ndmin=1).shape] + + @property + def target_shape(self) -> List[str]: + """Return the target shape info.""" + (*_, y) = self.data[0] + return [str(i) for i in np.array(y, ndmin=1).shape] + + @property + def dataset_description(self) -> str: + """Return the dataset description.""" + return (f'Regression dataset, shard number {self.rank}' + f' out of {self.worldsize}') diff --git a/openfl-tutorials/interactive_api/PyTorch_LinearRegression/envoy/requirements.txt b/openfl-tutorials/interactive_api/PyTorch_LinearRegression/envoy/requirements.txt new file mode 100644 index 00000000000..0baaf04f874 --- /dev/null +++ b/openfl-tutorials/interactive_api/PyTorch_LinearRegression/envoy/requirements.txt @@ -0,0 +1,7 @@ +openfl>=1.2.1 +numpy>=1.13.3 +torch>=1.13.1 +scikit-learn>=0.24.1 +mistune>=2.0.3 # not directly required, pinned by Snyk to avoid a vulnerability +setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability +wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/openfl-tutorials/interactive_api/PyTorch_LinearRegression/envoy/start_envoy.sh b/openfl-tutorials/interactive_api/PyTorch_LinearRegression/envoy/start_envoy.sh new file mode 100755 index 00000000000..4da07821af4 --- /dev/null +++ b/openfl-tutorials/interactive_api/PyTorch_LinearRegression/envoy/start_envoy.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -e +ENVOY_NAME=$1 +ENVOY_CONF=$2 + +fx envoy start -n "$ENVOY_NAME" --disable-tls --envoy-config-path "$ENVOY_CONF" -dh localhost -dp 50050 diff --git a/openfl-tutorials/interactive_api/PyTorch_LinearRegression/workspace/requirements.txt b/openfl-tutorials/interactive_api/PyTorch_LinearRegression/workspace/requirements.txt new file mode 100644 index 00000000000..f2177597843 --- /dev/null +++ b/openfl-tutorials/interactive_api/PyTorch_LinearRegression/workspace/requirements.txt @@ -0,0 +1,7 @@ +openfl>=1.2.1 +numpy>=1.13.3 +torch>=1.13.1 +scikit-learn>=0.24.1 +jupyterlab +setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability +wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/openfl-tutorials/interactive_api/PyTorch_LinearRegression/workspace/torch_linear_regression.ipynb b/openfl-tutorials/interactive_api/PyTorch_LinearRegression/workspace/torch_linear_regression.ipynb new file mode 100644 index 00000000000..d7ad3e310f6 --- /dev/null +++ b/openfl-tutorials/interactive_api/PyTorch_LinearRegression/workspace/torch_linear_regression.ipynb @@ -0,0 +1,388 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Torch Regression Example - Interactive API" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import torch\n", + "import torch.nn as nn\n", + "import torch.optim as optim" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "NUM_FEATURES = 1\n", + "LEARNING_RATE = 0.5" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Torch Definitions" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class LRModel(nn.Module):\n", + "\n", + " def __init__(self, in_features: int, out_features: int) -> None:\n", + " super().__init__()\n", + " self.fc = torch.nn.Linear(in_features, out_features)\n", + " \n", + " def forward(self, x: torch.Tensor) -> torch.Tensor:\n", + " return self.fc(x)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model = LRModel(NUM_FEATURES, 1)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Optimizer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Loss function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "loss_fn = nn.MSELoss()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Federation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import copy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from openfl.interface.interactive_api.experiment import TaskInterface, DataInterface, ModelInterface, FLExperiment" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class LRDataset(DataInterface):\n", + " def __init__(self, train_bs: int = 1024, val_bs: int = 1024, **kwargs):\n", + " super().__init__(**kwargs)\n", + " self._train_bs = train_bs\n", + " self._val_bs = val_bs\n", + " self._train_data = None\n", + " self._val_data = None\n", + "\n", + " @property\n", + " def shard_descriptor(self):\n", + " return self._shard_descriptor\n", + "\n", + " @shard_descriptor.setter\n", + " def shard_descriptor(self, shard_descriptor):\n", + " \"\"\"\n", + " Describe per-collaborator procedures or sharding.\n", + "\n", + " This method will be called during a collaborator initialization.\n", + " Local shard_descriptor will be set by Envoy.\n", + " \"\"\"\n", + " self._shard_descriptor = shard_descriptor\n", + " self._train_data = self._shard_descriptor.get_dataset('train')\n", + " self._val_data = self._shard_descriptor.get_dataset('val')\n", + " \n", + " def get_train_loader(self, **kwargs):\n", + " \"\"\"\n", + " Output of this method will be provided to tasks with optimizer in contract\n", + " \"\"\"\n", + " if self._train_data is None:\n", + " raise ValueError(\"train data is not set\")\n", + " return torch.utils.data.DataLoader(self._train_data, batch_size=self._train_bs, shuffle=True)\n", + "\n", + " def get_valid_loader(self, **kwargs):\n", + " \"\"\"\n", + " Output of this method will be provided to tasks without optimizer in contract\n", + " \"\"\"\n", + " if self._val_data is None:\n", + " raise ValueError(\"validation data is not set\")\n", + " return torch.utils.data.DataLoader(self._val_data, batch_size=self._val_bs)\n", + "\n", + " def get_train_data_size(self):\n", + " \"\"\"\n", + " Information for aggregation\n", + " \"\"\"\n", + " if self._train_data is None:\n", + " raise ValueError(\"train data is not set\")\n", + " return len(self._train_data)\n", + "\n", + " def get_valid_data_size(self):\n", + " \"\"\"\n", + " Information for aggregation\n", + " \"\"\"\n", + " if self._val_data is None:\n", + " raise ValueError(\"validation data is not set\")\n", + " return len(self._val_data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fl_dataset = LRDataset()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Register model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "framework_adapter = 'openfl.plugins.frameworks_adapters.pytorch_adapter.FrameworkAdapterPlugin'\n", + "model_interface = ModelInterface(model=model, optimizer=optimizer, framework_plugin=framework_adapter)\n", + "\n", + "# Save the initial model state\n", + "initial_model = copy.deepcopy(model)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Register tasks" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "task_interface = TaskInterface()\n", + "\n", + "# Task interface currently supports only standalone functions.\n", + "@task_interface.add_kwargs(**{'loss_fn': loss_fn})\n", + "@task_interface.register_fl_task(model='model', data_loader='train_loader', device='device', optimizer='optimizer') \n", + "def train(model, train_loader, optimizer, device, loss_fn): \n", + " model.to(device)\n", + " model.train()\n", + "\n", + " losses = []\n", + " for data in train_loader:\n", + " data = data.to(device)\n", + " optimizer.zero_grad()\n", + " loss = loss_fn(model(data[:,:NUM_FEATURES]), data[:,NUM_FEATURES:])\n", + " loss.backward()\n", + " optimizer.step()\n", + " losses.append(loss.detach().cpu().numpy())\n", + "\n", + " return {'train_mse': np.mean(losses)}\n", + "\n", + "\n", + "@task_interface.add_kwargs(**{'loss_fn': loss_fn})\n", + "@task_interface.register_fl_task(model='model', data_loader='val_loader', device='device') \n", + "def validate(model, val_loader, device, loss_fn):\n", + " model.to(device)\n", + " model.eval()\n", + " \n", + " losses = []\n", + " with torch.no_grad():\n", + " for data in val_loader:\n", + " data = data.to(device)\n", + " loss = loss_fn(model(data[:,:NUM_FEATURES]), data[:,NUM_FEATURES:])\n", + " losses.append(loss.detach().cpu().numpy())\n", + "\n", + " return {'val_mse': np.mean(losses)}" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create Federation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from openfl.interface.interactive_api.federation import Federation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# please use the same identificator that was used in signed certificate\n", + "client_id = 'frontend'\n", + "director_node_fqdn = 'localhost'\n", + "director_port = 50050\n", + "\n", + "federation = Federation(\n", + " client_id=client_id,\n", + " director_node_fqdn=director_node_fqdn,\n", + " director_port=director_port,\n", + " tls=False\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Run Federation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# create an experimnet in federation\n", + "experiment_name = 'torch_linear_regression_experiment'\n", + "fl_experiment = FLExperiment(federation=federation, experiment_name=experiment_name)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# The following command zips the workspace and python requirements to be transfered to collaborator nodes\n", + "fl_experiment.start(\n", + " model_provider=model_interface, \n", + " task_keeper=task_interface,\n", + " data_loader=fl_dataset,\n", + " rounds_to_train=10\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fl_experiment.stream_metrics()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "osh", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +}