From 629909aa1b3a956015b0a2b8a0501cd5ec1f0b52 Mon Sep 17 00:00:00 2001 From: "Brian E. Granger" Date: Thu, 20 Apr 2023 08:46:13 -0700 Subject: [PATCH] Create /autonotebook command for AI generated notebooks (#90) * Initial autonotebook work. * Working autonotebook. * Adding first autogenerated notebook example. * Removing file. * Adding second autonotebook example. * Cleaning up code, renaming autonotebook to generate. * Minor fixes, adding new example notebook. * Renaming examples subdir. --- .../Creating Random Arrays with Numpy.ipynb | 360 ++++++++++++ examples/generate/Python Sets Tutorial.ipynb | 555 ++++++++++++++++++ ...ng Dense Neural Network with PyTorch.ipynb | 404 +++++++++++++ packages/jupyter-ai/jupyter_ai/actors/base.py | 4 +- .../jupyter-ai/jupyter_ai/actors/generate.py | 233 ++++++++ packages/jupyter-ai/jupyter_ai/extension.py | 8 + 6 files changed, 1563 insertions(+), 1 deletion(-) create mode 100644 examples/generate/Creating Random Arrays with Numpy.ipynb create mode 100644 examples/generate/Python Sets Tutorial.ipynb create mode 100644 examples/generate/Training Dense Neural Network with PyTorch.ipynb create mode 100644 packages/jupyter-ai/jupyter_ai/actors/generate.py diff --git a/examples/generate/Creating Random Arrays with Numpy.ipynb b/examples/generate/Creating Random Arrays with Numpy.ipynb new file mode 100644 index 000000000..efd540f3d --- /dev/null +++ b/examples/generate/Creating Random Arrays with Numpy.ipynb @@ -0,0 +1,360 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "c46fd5c5", + "metadata": {}, + "source": [ + "# Creating Random Arrays with Numpy" + ] + }, + { + "cell_type": "markdown", + "id": "a2a4149f", + "metadata": {}, + "source": [ + "## Introduction" + ] + }, + { + "cell_type": "markdown", + "id": "4b488198", + "metadata": {}, + "source": [ + "This notebook was created by [Jupyter AI](https://github.com/jupyterlab/jupyter-ai) with the following prompt:\n", + "\n", + "> /generate Create a Jupyter notebook that shows how to create a random array using numpy." + ] + }, + { + "cell_type": "markdown", + "id": "c6460605", + "metadata": {}, + "source": [ + "This Jupyter notebook demonstrates how to create a random array using numpy. It covers topics such as importing necessary packages, creating a random array, setting the array size and shape, setting the data type of the array, and generating a random array with specified parameters. Each section includes sample code for creating a random array and printing the results. This notebook is useful for anyone looking to generate random arrays in their data analysis or machine learning projects." + ] + }, + { + "cell_type": "markdown", + "id": "9cfcc84c", + "metadata": {}, + "source": [ + "## Creating a random array" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "4c50ec33", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "02a9481d", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "np.random.seed(123)\n", + "random_array = np.random.rand(3, 4)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "5ed2a6c8", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Random array:\n", + " [[0.69646919 0.28613933 0.22685145 0.55131477]\n", + " [0.71946897 0.42310646 0.9807642 0.68482974]\n", + " [0.4809319 0.39211752 0.34317802 0.72904971]]\n" + ] + } + ], + "source": [ + "print(\"Random array:\\n\", random_array)" + ] + }, + { + "cell_type": "markdown", + "id": "2e9d4225", + "metadata": {}, + "source": [ + "## Setting the array size and shape" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "15bfe3cb", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "2aedfee5", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Set the size and shape of the random array\n", + "array_size = (3, 4) # number of rows and columns" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "6dc9a89a", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Create the random array using the specified size and shape\n", + "random_array = np.random.rand(*array_size) # *array_size unpacks the tuple" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "7b4b2ae5", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Random array:\n", + " [[0.43857224 0.0596779 0.39804426 0.73799541]\n", + " [0.18249173 0.17545176 0.53155137 0.53182759]\n", + " [0.63440096 0.84943179 0.72445532 0.61102351]]\n" + ] + } + ], + "source": [ + "# Print the random array\n", + "print(\"Random array:\\n\", random_array)" + ] + }, + { + "cell_type": "markdown", + "id": "863dd179", + "metadata": {}, + "source": [ + "## Setting the data type of the array" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "fed55a87", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "d2fa2a10", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Set the data type of the random array to be created\n", + "dtype = np.int32" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "9c462fdb", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Set the size and shape of the random array\n", + "array_size = (3, 4) # number of rows and columns" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "ddcf206e", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Create the random array using the specified size, shape, and data type\n", + "random_array = np.random.randint(low=0, high=10, size=array_size, dtype=dtype)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "fcc3d78c", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Random array:\n", + " [[4 6 1 5]\n", + " [6 2 1 8]\n", + " [3 5 0 2]]\n" + ] + } + ], + "source": [ + "# Print the random array\n", + "print(\"Random array:\\n\", random_array)" + ] + }, + { + "cell_type": "markdown", + "id": "f1c81186", + "metadata": {}, + "source": [ + "## Generating a random array with specified parameters" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "b0526789", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "9ebd784c", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "array_size = (5, 7) \n", + "min_val = -10\n", + "max_val = 10" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "56567059", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "def create_random_array(size, low, high):\n", + " return np.random.randint(low=low, high=high, size=size)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "57c8282d", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "random_array = create_random_array(array_size, min_val, max_val)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "a2c9d87f", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Random array:\n", + " [[ 0 3 8 -6 5 1 2]\n", + " [-4 3 9 6 -4 4 -3]\n", + " [ 1 -3 -9 1 -5 8 7]\n", + " [ 2 8 7 -9 9 2 -1]\n", + " [ 6 7 -7 -7 1 -3 -1]]\n" + ] + } + ], + "source": [ + "print(\"Random array:\\n\", random_array)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0abd2b89-c2e1-4083-9d4a-29da5a2096c3", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/generate/Python Sets Tutorial.ipynb b/examples/generate/Python Sets Tutorial.ipynb new file mode 100644 index 000000000..4c7421f25 --- /dev/null +++ b/examples/generate/Python Sets Tutorial.ipynb @@ -0,0 +1,555 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "85d698ab", + "metadata": {}, + "source": [ + "# Python Sets Tutorial" + ] + }, + { + "cell_type": "markdown", + "id": "fd6a12e2-60d7-40a2-848a-9599104bba0d", + "metadata": {}, + "source": [ + "## Introduction" + ] + }, + { + "cell_type": "markdown", + "id": "c3640fd4", + "metadata": {}, + "source": [ + "This notebook was created by [Jupyter AI](https://github.com/jupyterlab/jupyter-ai) with the following prompt:\n", + "\n", + "> A Python notebook that teaches how to use sets." + ] + }, + { + "cell_type": "markdown", + "id": "7fe7f535", + "metadata": {}, + "source": [ + "This Jupyter notebook teaches how to use sets in Python, covering topics such as creating a set, adding and removing elements from a set, set operations such as union, intersection, and difference, set comprehensions, and frozen sets. Each section includes improved and validated code examples to demonstrate the concepts. The notebook provides a comprehensive guide to working with sets in Python." + ] + }, + { + "cell_type": "markdown", + "id": "43923cd7", + "metadata": {}, + "source": [ + "## Adding Elements to a Set" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "c3eae75f", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{1, 2, 3, 4, 5, 6}\n" + ] + } + ], + "source": [ + "# Improved and Valid Code for \"Adding Elements to a Set\" section:\n", + "# To add an element to a set, use the add() method.\n", + "# Here is an example:\n", + "my_set = {1, 2, 3, 4, 5}\n", + "my_set.add(6)\n", + "print(my_set) # Output: {1, 2, 3, 4, 5, 6}" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "f6e3726d", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{1, 2, 3, 4, 5, 6, 7, 8, 9}\n" + ] + } + ], + "source": [ + "# You can also add multiple elements to a set using the update() method.\n", + "# The update() method takes an iterable as its argument.\n", + "# Here is an example:\n", + "my_set.update([7, 8, 9])\n", + "print(my_set) # Output: {1, 2, 3, 4, 5, 6, 7, 8, 9}" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "39352fae", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{1, 2, 3, 4, 5, 6, 7, 8, 9}\n" + ] + } + ], + "source": [ + "# You can also add multiple elements to a set using the update() method.\n", + "# The update() method takes an iterable as its argument.\n", + "# Here is an example:\n", + "my_set.update([7, 8, 9])\n", + "print(my_set) # Output: {1, 2, 3, 4, 5, 6, 7, 8, 9}" + ] + }, + { + "cell_type": "markdown", + "id": "0c401d3d", + "metadata": {}, + "source": [ + "## Removing Elements from a Set" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "625ae27f", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Improved and validated code for \"Removing Elements from a Set\" section:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "a02781a0", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Define a set\n", + "my_set = {1, 2, 3, 4, 5, 6, 7, 8, 9}" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "b0cf21d4", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{1, 2, 3, 4, 6, 7, 8, 9}\n" + ] + } + ], + "source": [ + "# Remove an element from the set using the remove() method\n", + "my_set.remove(5)\n", + "print(my_set) # Output: {1, 2, 3, 4, 6, 7, 8, 9}" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "c99bd165", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{1, 2, 3, 4, 6, 7, 8, 9}\n" + ] + } + ], + "source": [ + "# Remove an element from the set using the discard() method\n", + "# The difference between remove() and discard() is that discard() does not raise an error if the element is not in the set.\n", + "my_set.discard(10)\n", + "print(my_set) # Output: {1, 2, 3, 4, 6, 7, 8, 9}" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "360ce05d", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\n", + "{2, 3, 4, 6, 7, 8, 9}\n" + ] + } + ], + "source": [ + "# Remove an element from the set using the pop() method\n", + "# The pop() method removes and returns an arbitrary element from the set.\n", + "removed_element = my_set.pop()\n", + "print(removed_element) # Output: 1\n", + "print(my_set) # Output: {2, 3, 4, 6, 7, 8, 9}" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "c9122fed", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2\n", + "{3, 4, 6, 7, 8, 9}\n" + ] + } + ], + "source": [ + "# Remove an element from the set using the pop() method\n", + "# The pop() method removes and returns an arbitrary element from the set.\n", + "removed_element = my_set.pop()\n", + "print(removed_element) # Output: 1\n", + "print(my_set) # Output: {2, 3, 4, 6, 7, 8, 9}" + ] + }, + { + "cell_type": "markdown", + "id": "8b4a3e81", + "metadata": {}, + "source": [ + "## Set Operations" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "2eb675ba", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Improved and Valid Code for the \"Set Operations\" section:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "4f771bfc", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Union Set: {1, 2, 3, 4, 5}\n" + ] + } + ], + "source": [ + "# To perform set operations such as union, intersection, and difference, use the corresponding methods.\n", + "# Union: the union() method returns a set containing all the elements of both sets.\n", + "# Here is an example:\n", + "set1 = {1, 2, 3}\n", + "set2 = {3, 4, 5}\n", + "union_set = set1.union(set2)\n", + "print(\"Union Set: \", union_set) # Output: {1, 2, 3, 4, 5}" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "f5c72af9", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Intersection Set: {3}\n" + ] + } + ], + "source": [ + "# Intersection: the intersection() method returns a set containing only the elements that are common to both sets.\n", + "# Here is an example:\n", + "intersection_set = set1.intersection(set2)\n", + "print(\"Intersection Set: \", intersection_set) # Output: {3}" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "2b4dd0e6", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Difference Set: {1, 2}\n" + ] + } + ], + "source": [ + "# Difference: the difference() method returns a set containing the elements that are in the first set but not in the second set.\n", + "# Here is an example:\n", + "difference_set = set1.difference(set2)\n", + "print(\"Difference Set: \", difference_set) # Output: {1, 2}" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "ebabab91", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Difference Set using '-' operator: {1, 2}\n" + ] + } + ], + "source": [ + "# Alternatively, you can use the '-' operator to find the difference between two sets:\n", + "difference_set = set1 - set2\n", + "print(\"Difference Set using '-' operator: \", difference_set) # Output: {1, 2}" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "c6da13f8", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Difference Set using '-' operator: {1, 2}\n" + ] + } + ], + "source": [ + "# Alternatively, you can use the '-' operator to find the difference between two sets:\n", + "difference_set = set1 - set2\n", + "print(\"Difference Set using '-' operator: \", difference_set) # Output: {1, 2}" + ] + }, + { + "cell_type": "markdown", + "id": "eeea8161", + "metadata": {}, + "source": [ + "## Set Comprehensions" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "7b3766a7", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Improved and Valid Code for the \"Set Comprehensions\" section:" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "193bd32f", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}\n" + ] + } + ], + "source": [ + "# Set comprehensions provide a concise way to create sets based on existing sets or other iterables.\n", + "# Here is an example:\n", + "my_set = {x for x in range(10)}\n", + "print(my_set) # Output: {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "1508ecc7", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{0, 2, 4, 6, 8}\n" + ] + } + ], + "source": [ + "# You can also use conditionals in set comprehensions to filter the elements.\n", + "# Here is an example:\n", + "my_set = {x for x in range(10) if x % 2 == 0}\n", + "print(my_set) # Output: {0, 2, 4, 6, 8}" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "ec24164a", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{0, 2, 4, 6, 8}\n" + ] + } + ], + "source": [ + "# You can also use conditionals in set comprehensions to filter the elements.\n", + "# Here is an example:\n", + "my_set = {x for x in range(10) if x % 2 == 0}\n", + "print(my_set) # Output: {0, 2, 4, 6, 8}" + ] + }, + { + "cell_type": "markdown", + "id": "20ba0520", + "metadata": {}, + "source": [ + "## Frozen Sets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c74e4700", + "metadata": {}, + "outputs": [], + "source": [ + "# Improved and validated code for the \"Frozen Sets\" section:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a4d30af2", + "metadata": {}, + "outputs": [], + "source": [ + "# A frozen set is an immutable version of a set.\n", + "# You can create a frozen set by using the frozenset() constructor.\n", + "# Here is an example:\n", + "my_set = {1, 2, 3}\n", + "frozen_set = frozenset(my_set)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a34a80bd", + "metadata": {}, + "outputs": [], + "source": [ + "# Trying to add an element to a frozen set will result in a TypeError.\n", + "# Here is an example:\n", + "try:\n", + " frozen_set.add(4)\n", + "except AttributeError as e:\n", + " print(\"Error:\", e) # Output: Error: 'frozenset' object has no attribute 'add'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8a561b92", + "metadata": {}, + "outputs": [], + "source": [ + "# Trying to add an element to a frozen set will result in a TypeError.\n", + "# Here is an example:\n", + "try:\n", + " frozen_set.add(4)\n", + "except AttributeError as e:\n", + " print(\"Error:\", e) # Output: Error: 'frozenset' object has no attribute 'add'" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/generate/Training Dense Neural Network with PyTorch.ipynb b/examples/generate/Training Dense Neural Network with PyTorch.ipynb new file mode 100644 index 000000000..74c659a89 --- /dev/null +++ b/examples/generate/Training Dense Neural Network with PyTorch.ipynb @@ -0,0 +1,404 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "5324abdd", + "metadata": {}, + "source": [ + "# Training Dense Neural Network with PyTorch" + ] + }, + { + "cell_type": "markdown", + "id": "0ce2e9ba", + "metadata": {}, + "source": [ + "## Introduction" + ] + }, + { + "cell_type": "markdown", + "id": "1f1e8269", + "metadata": {}, + "source": [ + "This notebook was created by [Jupyter AI](https://github.com/jupyterlab/jupyter-ai) with the following prompt:\n", + "\n", + "> A Jupyter notebook on training a dense neural network with 3 layers using PyTorch." + ] + }, + { + "cell_type": "markdown", + "id": "d03c0e8d", + "metadata": {}, + "source": [ + "This Jupyter notebook covers the process of training a dense neural network with 3 layers using PyTorch. The notebook begins with importing necessary libraries and loading the dataset. Preprocessing the dataset and splitting it into training and validation sets is then performed. The architecture of the neural network is defined, followed by defining the loss function and optimizer. The notebook then goes on to train the neural network on the training set and validate it on the validation set. The performance of the model is evaluated on the test set and the accuracy is printed. Overall, this notebook provides a comprehensive guide to training a neural network using PyTorch." + ] + }, + { + "cell_type": "markdown", + "id": "e95c4ee9", + "metadata": {}, + "source": [ + "## Loading the dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5bd9bb6c", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "63eb467c", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "b0b703e9", + "metadata": {}, + "source": [ + "## Preparing the dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0753783a", + "metadata": {}, + "outputs": [], + "source": [ + "# Preprocessing the dataset\n", + "X = dataset.iloc[:, :-1].values # Extract features (all columns except the last one)\n", + "y = dataset.iloc[:, -1].values # Extract target (last column)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "311790fe", + "metadata": {}, + "outputs": [], + "source": [ + "# Encode the target variable\n", + "from sklearn.preprocessing import LabelEncoder\n", + "le = LabelEncoder()\n", + "y = le.fit_transform(y)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "597517ec", + "metadata": {}, + "outputs": [], + "source": [ + "# Split the dataset into training and validation sets\n", + "from sklearn.model_selection import train_test_split\n", + "X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fbef2579", + "metadata": {}, + "outputs": [], + "source": [ + "# Print the shapes of the training and validation sets\n", + "print(\"Training set shape:\", X_train.shape, y_train.shape)\n", + "print(\"Validation set shape:\", X_val.shape, y_val.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "288c7b8a", + "metadata": {}, + "outputs": [], + "source": [ + "# Print the shapes of the training and validation sets\n", + "print(\"Training set shape:\", X_train.shape, y_train.shape)\n", + "print(\"Validation set shape:\", X_val.shape, y_val.shape)" + ] + }, + { + "cell_type": "markdown", + "id": "0ea5cff6", + "metadata": {}, + "source": [ + "## Defining the neural network architecture" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b1dbdf42", + "metadata": {}, + "outputs": [], + "source": [ + "# Define the neural network architecture\n", + "class NeuralNet(torch.nn.Module):\n", + " def __init__(self):\n", + " super(NeuralNet, self).__init__()\n", + " self.layer1 = torch.nn.Linear(4, 10) # First layer with 4 input features and 10 output features\n", + " self.layer2 = torch.nn.Linear(10, 5) # Second layer with 10 input features and 5 output features\n", + " self.layer3 = torch.nn.Linear(5, 3) # Third layer with 5 input features and 3 output features\n", + " \n", + " def forward(self, x):\n", + " x = torch.relu(self.layer1(x)) # Pass input through first layer and apply ReLU activation function\n", + " x = torch.relu(self.layer2(x)) # Pass through second layer and apply ReLU activation function\n", + " x = self.layer3(x) # Pass through third layer without activation function\n", + " return x" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "057af6d9", + "metadata": {}, + "outputs": [], + "source": [ + "# Create an instance of the neural network\n", + "model = NeuralNet()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6c6f61fd", + "metadata": {}, + "outputs": [], + "source": [ + "# Print the neural network architecture\n", + "print(model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8122dd4e", + "metadata": {}, + "outputs": [], + "source": [ + "# Print the neural network architecture\n", + "print(model)" + ] + }, + { + "cell_type": "markdown", + "id": "4430b6b0", + "metadata": {}, + "source": [ + "## Defining the loss function and optimizer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e46fc0e2", + "metadata": {}, + "outputs": [], + "source": [ + "# Define the loss function and optimizer\n", + "loss_fn = torch.nn.CrossEntropyLoss()\n", + "optimizer = torch.optim.SGD(model.parameters(), lr=0.01)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b958e6bf", + "metadata": {}, + "outputs": [], + "source": [ + "# Define the loss function and optimizer\n", + "loss_fn = torch.nn.CrossEntropyLoss()\n", + "optimizer = torch.optim.SGD(model.parameters(), lr=0.01)" + ] + }, + { + "cell_type": "markdown", + "id": "b11cd335", + "metadata": {}, + "source": [ + "## Training the neural network" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "de30c01e", + "metadata": {}, + "outputs": [], + "source": [ + "# Train the neural network\n", + "num_epochs = 1000\n", + "train_losses = []\n", + "val_losses = []\n", + "for epoch in range(num_epochs):\n", + " # Training phase\n", + " inputs = torch.Tensor(X_train).float()\n", + " targets = torch.Tensor(y_train).long()\n", + " optimizer.zero_grad()\n", + " outputs = model(inputs)\n", + " loss = criterion(outputs, targets)\n", + " loss.backward()\n", + " optimizer.step()\n", + " train_losses.append(loss.item())\n", + " \n", + " # Validation phase\n", + " with torch.no_grad():\n", + " inputs = torch.Tensor(X_val).float()\n", + " targets = torch.Tensor(y_val).long()\n", + " outputs = model(inputs)\n", + " loss = criterion(outputs, targets)\n", + " val_losses.append(loss.item())\n", + " \n", + " # Print the training and validation loss values for every 100 epochs\n", + " if (epoch+1) % 100 == 0:\n", + " print(f\"Epoch [{epoch+1}/{num_epochs}], Training Loss: {train_losses[-1]:.4f}, Validation Loss: {val_losses[-1]:.4f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2371315c", + "metadata": {}, + "outputs": [], + "source": [ + "# Plot the training and validation loss curves\n", + "plt.plot(train_losses, label='Training loss')\n", + "plt.plot(val_losses, label='Validation loss')\n", + "plt.xlabel('Epoch')\n", + "plt.ylabel('Loss')\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4a468053", + "metadata": {}, + "outputs": [], + "source": [ + "# Plot the training and validation loss curves\n", + "plt.plot(train_losses, label='Training loss')\n", + "plt.plot(val_losses, label='Validation loss')\n", + "plt.xlabel('Epoch')\n", + "plt.ylabel('Loss')\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "53e0d55f", + "metadata": {}, + "source": [ + "## Evaluating the performance of the model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eb72c861", + "metadata": {}, + "outputs": [], + "source": [ + "# Evaluate the performance of the model on the test set\n", + "# Load the test dataset\n", + "url_test = \"https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.test\" # Test dataset URL\n", + "dataset_test = pd.read_csv(url_test, skiprows=1, names=names) # Load test dataset into a Pandas data frame" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "80c7800a", + "metadata": {}, + "outputs": [], + "source": [ + "# Preprocessing the test dataset\n", + "X_test = dataset_test.iloc[:, :-1].values # Extract features (all columns except the last one)\n", + "y_test = dataset_test.iloc[:, -1].values # Extract target (last column)\n", + "y_test = le.transform(y_test) # Encode the target variable" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c98e1cdd", + "metadata": {}, + "outputs": [], + "source": [ + "# Convert test inputs and targets to PyTorch tensors\n", + "inputs_test = torch.Tensor(X_test).float()\n", + "targets_test = torch.Tensor(y_test).long()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3f849708", + "metadata": {}, + "outputs": [], + "source": [ + "# Evaluate the model on the test set\n", + "outputs_test = model(inputs_test) # Forward pass\n", + "_, predicted = torch.max(outputs_test, 1) # Get the predicted class for each test input\n", + "correct = (predicted == targets_test).sum().item() # Get the number of correctly predicted test inputs\n", + "accuracy = correct / len(targets_test) # Compute the accuracy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "113d5abc", + "metadata": {}, + "outputs": [], + "source": [ + "# Print the accuracy of the model on the test set\n", + "print(f\"Accuracy on the test set: {accuracy:.4f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0064ac8f", + "metadata": {}, + "outputs": [], + "source": [ + "# Print the accuracy of the model on the test set\n", + "print(f\"Accuracy on the test set: {accuracy:.4f}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/packages/jupyter-ai/jupyter_ai/actors/base.py b/packages/jupyter-ai/jupyter_ai/actors/base.py index 3cd9318a4..587f84560 100644 --- a/packages/jupyter-ai/jupyter_ai/actors/base.py +++ b/packages/jupyter-ai/jupyter_ai/actors/base.py @@ -18,10 +18,12 @@ class ACTOR_TYPE(str, Enum): ASK = "ask" LEARN = 'learn' MEMORY = 'memory' + GENERATE = 'generate' COMMANDS = { '/ask': ACTOR_TYPE.ASK, - '/learn': ACTOR_TYPE.LEARN + '/learn': ACTOR_TYPE.LEARN, + '/generate': ACTOR_TYPE.GENERATE } class BaseActor(): diff --git a/packages/jupyter-ai/jupyter_ai/actors/generate.py b/packages/jupyter-ai/jupyter_ai/actors/generate.py new file mode 100644 index 000000000..bbcdef88b --- /dev/null +++ b/packages/jupyter-ai/jupyter_ai/actors/generate.py @@ -0,0 +1,233 @@ +import json +import os +import time +from uuid import uuid4 + +import ray +from ray.util.queue import Queue + +from langchain.llms import BaseLLM +from langchain.chat_models import ChatOpenAI +from langchain.prompts import PromptTemplate +from langchain.llms import BaseLLM +from langchain.chains import LLMChain + +import nbformat + +from jupyter_ai.models import AgentChatMessage, HumanChatMessage +from jupyter_ai.actors.base import BaseActor, Logger +from jupyter_ai_magics.providers import ChatOpenAINewProvider + +schema = """{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "description": { + "type": "string" + }, + "sections": { + "type": "array", + "items": { + "type": "object", + "properties": { + "title": { + "type": "string" + }, + "content": { + "type": "string" + } + }, + "required": ["title", "content"] + } + } + }, + "required": ["sections"] +}""" + +class NotebookOutlineChain(LLMChain): + """Chain to generate a notebook outline, with section titles and descriptions.""" + + @classmethod + def from_llm(cls, llm: BaseLLM, verbose: bool=False) -> LLMChain: + task_creation_template = ( + "You are an AI that creates a detailed content outline for a Jupyter notebook on a given topic.\n" + "Generate the outline as JSON data that will validate against this JSON schema:\n" + "{schema}\n" + "Here is a description of the notebook you will create an outline for: {description}\n" + "Don't include an introduction or conclusion section in the outline, focus only on sections that will need code." + ) + prompt = PromptTemplate( + template=task_creation_template, + input_variables=[ + "description", + "schema" + ], + ) + return cls(prompt=prompt, llm=llm, verbose=verbose) + +def generate_outline(description, llm=None, verbose=False): + """Generate an outline of sections given a description of a notebook.""" + if llm is None: + llm = ChatOpenAINewProvider(model_id='gpt-3.5-turbo') + chain = NotebookOutlineChain.from_llm(llm=llm, verbose=verbose) + outline = chain.predict(description=description, schema=schema) + return json.loads(outline) + +class CodeImproverChain(LLMChain): + """Chain to improve source code.""" + + @classmethod + def from_llm(cls, llm: BaseLLM, verbose: bool=False) -> LLMChain: + task_creation_template = ( + "Improve the following code and make sure it is valid. Make sure to return the improved code only - don't give an explanation of the improvements.\n" + "{code}" + ) + prompt = PromptTemplate( + template=task_creation_template, + input_variables=[ + "code", + ], + ) + return cls(prompt=prompt, llm=llm, verbose=verbose) + +def improve_code(code, llm=None, verbose=False): + """Improve source code using an LLM.""" + chain = CodeImproverChain.from_llm(llm=llm, verbose=verbose) + improved_code = chain.predict(code=code) + improved_code = '\n'.join([line for line in improved_code.split('/n') if not line.startswith("```")]) + return improved_code + +class NotebookSectionCodeChain(LLMChain): + """Chain to generate source code for a notebook section.""" + + @classmethod + def from_llm(cls, llm: BaseLLM, verbose: bool=False) -> LLMChain: + task_creation_template = ( + "You are an AI that writes code for a single section of a Jupyter notebook.\n" + "Overall topic of the notebook: {description}\n" + "Title of the notebook section: {title}\n" + "Description of the notebok section: {content}\n" + "Given this information, write all the code for this section and this section only." + " Your output should be valid code with inline comments.\n" + "Code in the notebook so far:\n" + "{code_so_far}" + ) + prompt = PromptTemplate( + template=task_creation_template, + input_variables=[ + "description", + "title", + "content", + "code_so_far" + ], + ) + return cls(prompt=prompt, llm=llm, verbose=verbose) + +def generate_code(outline, llm=None, verbose=False): + """Generate source code for a section given a description of the notebook and section.""" + if llm is None: + llm = ChatOpenAINewProvider(model_id='gpt-3.5-turbo') + chain = NotebookSectionCodeChain.from_llm(llm=llm, verbose=verbose) + code_so_far = [] + for section in outline['sections']: + code = chain.predict( + description=outline['description'], + title=section['title'], + content=section['content'], + code_so_far='\n'.join(code_so_far) + ) + section['code'] = improve_code(code, llm=llm, verbose=verbose) + code_so_far.append(section['code']) + return outline + +class NotebookSummaryChain(LLMChain): + """Chain to generate a short summary of a notebook.""" + + @classmethod + def from_llm(cls, llm: BaseLLM, verbose: bool=False) -> LLMChain: + task_creation_template = ( + "Create a markdown summary for a Jupyter notebook with the following content." + " The summary should consist of a single paragraph.\n" + "Content:\n{content}" + ) + prompt = PromptTemplate( + template=task_creation_template, + input_variables=[ + "content", + ], + ) + return cls(prompt=prompt, llm=llm, verbose=verbose) + +class NotebookTitleChain(LLMChain): + """Chain to generate the title of a notebook.""" + + @classmethod + def from_llm(cls, llm: BaseLLM, verbose: bool=False) -> LLMChain: + task_creation_template = ( + "Create a short, few word, descriptive title for a Jupyter notebook with the following content.\n" + "Content:\n{content}" + ) + prompt = PromptTemplate( + template=task_creation_template, + input_variables=[ + "content", + ], + ) + return cls(prompt=prompt, llm=llm, verbose=verbose) + +def generate_title_and_summary(outline, llm=None, verbose=False): + """Generate a title and summary of a notebook outline using an LLM.""" + if llm is None: + llm = ChatOpenAINewProvider(model_id='gpt-3.5-turbo') + summary_chain = NotebookSummaryChain.from_llm(llm=llm, verbose=verbose) + title_chain = NotebookTitleChain.from_llm(llm=llm, verbose=verbose) + summary = summary_chain.predict(content=outline) + title = title_chain.predict(content=outline) + outline['summary'] = summary + outline['title'] = title.strip('"') + return outline + +def create_notebook(outline): + """Create an nbformat Notebook object for a notebook outline.""" + nbf = nbformat.v4 + nb = nbf.new_notebook() + nb['cells'].append(nbf.new_markdown_cell('# ' + outline['title'])) + nb['cells'].append(nbf.new_markdown_cell('## Introduction')) + disclaimer = f"This notebook was created by [Jupyter AI](https://github.com/jupyterlab/jupyter-ai) with the following prompt:\n\n> {outline['prompt']}" + nb['cells'].append(nbf.new_markdown_cell(disclaimer)) + nb['cells'].append(nbf.new_markdown_cell(outline['summary'])) + + for section in outline['sections'][1:]: + nb['cells'].append(nbf.new_markdown_cell('## ' + section['title'])) + for code_block in section['code'].split('\n\n'): + nb['cells'].append(nbf.new_code_cell(code_block)) + return nb + +@ray.remote +class GenerateActor(BaseActor): + """A Ray actor to generate a Jupyter notebook given a description.""" + + def __init__(self, reply_queue: Queue, root_dir: str, log: Logger): + super().__init__(log=log, reply_queue=reply_queue) + self.root_dir = os.path.abspath(os.path.expanduser(root_dir)) + self.llm = ChatOpenAINewProvider(model_id='gpt-3.5-turbo') + + def _process_message(self, message: HumanChatMessage): + response = "👍 Great, I will get started on your notebook. It may take a few minutes, but I will reply here when the notebook is ready. In the meantime, you can continue to ask me other questions." + self.reply(response, message) + + prompt = message.body + outline = generate_outline(prompt, llm=self.llm, verbose=True) + # Save the user input prompt, the description property is now LLM generated. + outline['prompt'] = prompt + outline = generate_code(outline, llm=self.llm, verbose=True) + outline = generate_title_and_summary(outline, llm=self.llm) + notebook = create_notebook(outline) + final_path = os.path.join(self.root_dir, outline['title'] + '.ipynb') + nbformat.write(notebook, final_path) + response = f"""🎉 I have created your notebook and saved it to the location {final_path}. I am still learning how to create notebooks, so please review all code before running it.""" + self.reply(response, message) + + +# /generate notebook +# Error handling diff --git a/packages/jupyter-ai/jupyter_ai/extension.py b/packages/jupyter-ai/jupyter_ai/extension.py index a72988636..e4b22a1f5 100644 --- a/packages/jupyter-ai/jupyter_ai/extension.py +++ b/packages/jupyter-ai/jupyter_ai/extension.py @@ -7,6 +7,7 @@ from jupyter_ai.actors.learn import LearnActor from jupyter_ai.actors.router import Router from jupyter_ai.actors.memory import MemoryActor +from jupyter_ai.actors.generate import GenerateActor from jupyter_ai.actors.base import ACTOR_TYPE from jupyter_ai.reply_processor import ReplyProcessor from jupyter_server.extension.application import ExtensionApp @@ -133,11 +134,18 @@ def initialize_settings(self): log=self.log, memory=ConversationBufferWindowMemory(return_messages=True, k=2) ) + generate_actor = GenerateActor.options(name=ACTOR_TYPE.GENERATE.value).remote( + reply_queue=reply_queue, + log=self.log, + root_dir=self.settings['server_root_dir'] + ) + self.settings['router'] = router self.settings["default_actor"] = default_actor self.settings["learn_actor"] = learn_actor self.settings["ask_actor"] = ask_actor self.settings["memory_actor"] = memory_actor + self.settings["generate_actor"] = generate_actor reply_processor = ReplyProcessor(self.settings['chat_handlers'], reply_queue, log=self.log) loop = asyncio.get_event_loop()