From d25250a89735187ed124308eb83ee596e3f4d96e Mon Sep 17 00:00:00 2001 From: Balaji Veeramani Date: Mon, 10 Jul 2023 15:14:00 -0500 Subject: [PATCH] [AIR][Docs] Remove `BatchPredictor` from examples (#37178) The following examples already use updated APIs: * Stable Diffusion Batch Prediction with Ray AIR * GPT-J-6B Batch Prediction with Ray AIR (LLM) The following examples have been updated to use updated APIs: * Training a model with distributed XGBoost * Training a model with distributed LightGBM I've removed batch prediction sections from the other examples, and, where appropriate, linked to the batch inference user guide. Signed-off-by: Balaji Veeramani --- doc/source/_toc.yml | 1 - .../ray-air/doc_code/pytorch_starter.py | 14 - doc/source/ray-air/doc_code/tf_starter.py | 21 - doc/source/ray-air/examples/BUILD | 13 +- ...ert_existing_pytorch_code_to_ray_air.ipynb | 386 +------- .../convert_existing_tf_code_to_ray_air.ipynb | 260 +---- .../examples/gptj_deepspeed_fine_tuning.ipynb | 2 +- .../huggingface_text_classification.ipynb | 127 +-- doc/source/ray-air/examples/index.rst | 1 - .../ray-air/examples/lightgbm_example.ipynb | 450 +++++---- .../opt_deepspeed_batch_inference.ipynb | 911 ------------------ .../pytorch_tabular_batch_prediction.py | 46 - .../examples/pytorch_tabular_starter.py | 19 - .../ray-air/examples/sklearn_example.ipynb | 106 +- .../examples/tf_tabular_batch_prediction.py | 49 - .../ray-air/examples/tf_tabular_starter.py | 21 - .../ray-air/examples/torch_detection.ipynb | 149 +-- .../examples/torch_image_batch_pretrained.py | 30 - .../examples/torch_incremental_learning.ipynb | 416 +------- .../examples/xgboost_batch_prediction.py | 42 - .../ray-air/examples/xgboost_example.ipynb | 433 ++++----- .../ray-air/examples/xgboost_starter.py | 18 - doc/source/ray-air/getting-started.rst | 28 +- doc/source/ray-air/predictors.rst | 45 - doc/source/ray-overview/examples.rst | 7 - doc/source/ray-overview/getting-started.md | 9 +- .../lightning/lightning_cola_advanced.ipynb | 126 +-- ...ingface_basic_language_modeling_example.py | 20 +- .../pytorch/torch_regression_example.py | 21 +- .../tf/tensorflow_autoencoder_example.py | 63 +- .../tf/tensorflow_regression_example.py | 29 +- release/release_tests.yaml | 57 +- 32 files changed, 584 insertions(+), 3336 deletions(-) delete mode 100644 doc/source/ray-air/examples/opt_deepspeed_batch_inference.ipynb delete mode 100644 doc/source/ray-air/examples/pytorch_tabular_batch_prediction.py delete mode 100644 doc/source/ray-air/examples/tf_tabular_batch_prediction.py delete mode 100644 doc/source/ray-air/examples/torch_image_batch_pretrained.py delete mode 100644 doc/source/ray-air/examples/xgboost_batch_prediction.py diff --git a/doc/source/_toc.yml b/doc/source/_toc.yml index 99ff9dc9ee4c..4f4cfd850e9c 100644 --- a/doc/source/_toc.yml +++ b/doc/source/_toc.yml @@ -57,7 +57,6 @@ parts: - file: ray-air/deployment - file: ray-air/examples/index sections: - - file: ray-air/examples/opt_deepspeed_batch_inference - file: ray-air/examples/torch_image_example - file: ray-air/examples/torch_detection - file: ray-air/examples/convert_existing_pytorch_code_to_ray_air diff --git a/doc/source/ray-air/doc_code/pytorch_starter.py b/doc/source/ray-air/doc_code/pytorch_starter.py index edd711923515..f6f93e3459e6 100644 --- a/doc/source/ray-air/doc_code/pytorch_starter.py +++ b/doc/source/ray-air/doc_code/pytorch_starter.py @@ -129,17 +129,3 @@ def train_func(config): result = trainer.fit() print(f"Last result: {result.metrics}") # __air_pytorch_train_end__ - - -# # __air_pytorch_batchpred_start__ -# import random -# from ray.train.batch_predictor import BatchPredictor -# from ray.train.torch import TorchPredictor - -# batch_predictor = BatchPredictor.from_checkpoint(result.checkpoint, TorchPredictor) - -# items = [{"x": random.uniform(0, 1) for _ in range(10)}] -# prediction_dataset = ray.data.from_items(items) - -# predictions = batch_predictor.predict(prediction_dataset, dtype=torch.float) -# # __air_pytorch_batchpred_end__ diff --git a/doc/source/ray-air/doc_code/tf_starter.py b/doc/source/ray-air/doc_code/tf_starter.py index 5ea80d19ad90..acd78dc49288 100644 --- a/doc/source/ray-air/doc_code/tf_starter.py +++ b/doc/source/ray-air/doc_code/tf_starter.py @@ -75,24 +75,3 @@ def train_func(config: dict): result = trainer.fit() print(result.metrics) # __air_tf_train_end__ - -# __air_tf_batchpred_start__ -import numpy as np - -from ray.train.batch_predictor import BatchPredictor -from ray.train.tensorflow import TensorflowPredictor - - -batch_predictor = BatchPredictor.from_checkpoint( - result.checkpoint, TensorflowPredictor, model_definition=build_model -) - -items = [{"x": np.random.uniform(0, 1)} for _ in range(10)] -prediction_dataset = ray.data.from_items(items) - -predictions = batch_predictor.predict(prediction_dataset, dtype=tf.float32) - -print("PREDICTIONS") -predictions.show() - -# __air_tf_batchpred_end__ diff --git a/doc/source/ray-air/examples/BUILD b/doc/source/ray-air/examples/BUILD index ecbfa5b1a41d..9e0c1905a587 100644 --- a/doc/source/ray-air/examples/BUILD +++ b/doc/source/ray-air/examples/BUILD @@ -15,22 +15,12 @@ filegroup( py_test_run_all_subdirectory( size = "medium", include = ["*.py"], - exclude = ["torch_image_batch_pretrained.py"], - extra_srcs = [], - data = ["//doc/source/ray-air/examples:air_examples"], - tags = ["exclusive", "team:ml", "ray_air"], -) - -py_test_run_all_subdirectory( - size = "medium", - include = ["torch_image_batch_pretrained.py"], exclude = [], extra_srcs = [], data = ["//doc/source/ray-air/examples:air_examples"], - tags = ["exclusive", "team:ml", "ray_air", "gpu"], + tags = ["exclusive", "team:ml", "ray_air"], ) - # -------------------------------------------------------------------- # Test all doc/source/ray-air/examples notebooks. # -------------------------------------------------------------------- @@ -50,7 +40,6 @@ py_test_run_all_notebooks( "gptj_serving.ipynb", # Requires GPUs "stablediffusion_batch_prediction.ipynb", # Requires GPUs "gptj_deepspeed_fine_tuning.ipynb", # Requires release test - "opt_deepspeed_batch_inference.ipynb", # Requires release test "dolly_lightning_fsdp_finetuning.ipynb", # Requires release test ], data = ["//doc/source/ray-air/examples:air_examples"], diff --git a/doc/source/ray-air/examples/convert_existing_pytorch_code_to_ray_air.ipynb b/doc/source/ray-air/examples/convert_existing_pytorch_code_to_ray_air.ipynb index d56c70a745cb..08cffc200169 100644 --- a/doc/source/ray-air/examples/convert_existing_pytorch_code_to_ray_air.ipynb +++ b/doc/source/ray-air/examples/convert_existing_pytorch_code_to_ray_air.ipynb @@ -18,13 +18,13 @@ "- Automatic checkpointing/fault tolerance and result tracking\n", "- Parallel data preprocessing\n", "- Seamless integration with hyperparameter tuning\n", - "- Scalable batch prediction\n", "- Scalable model serving\n", "\n", - "This tutorial will show you how to start with Ray AIR from your existing PyTorch training code. We will learn how to **distribute your training** and do **scalable batch prediction**.\n" + "This tutorial will show you how to start with Ray AIR from your existing PyTorch training code and learn how to **distribute your training**.\n" ] }, { + "attachments": {}, "cell_type": "markdown", "id": "9a4855cf", "metadata": {}, @@ -37,6 +37,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "a42faedb", "metadata": {}, @@ -97,6 +98,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "9795c146", "metadata": {}, @@ -119,6 +121,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "ae11399e", "metadata": {}, @@ -178,6 +181,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "b692d06a", "metadata": {}, @@ -197,6 +201,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "681d5798", "metadata": {}, @@ -232,6 +237,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "6621cffa", "metadata": {}, @@ -264,6 +270,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "d915d788", "metadata": {}, @@ -389,6 +396,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "6655d903", "metadata": {}, @@ -397,6 +405,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "d0b98b1c", "metadata": {}, @@ -442,6 +451,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "60f7341a", "metadata": {}, @@ -554,6 +564,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "b3df2581", "metadata": {}, @@ -672,6 +683,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "6e260f44", "metadata": {}, @@ -704,6 +716,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "cf280e6a", "metadata": {}, @@ -750,6 +763,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "0fc52cc7", "metadata": {}, @@ -778,6 +792,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "341f4fd8", "metadata": {}, @@ -788,6 +803,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "3bbe06f3", "metadata": {}, @@ -875,6 +891,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "d2af219d", "metadata": {}, @@ -898,6 +915,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "534ed4df", "metadata": {}, @@ -926,367 +944,7 @@ ] }, { - "cell_type": "markdown", - "id": "b6b15d88", - "metadata": {}, - "source": [ - "## Loading the model for prediction\n", - "You may have noticed that we skipped one part of the original tutorial - loading the model and using it for inference. The original code looks like this (we've wrapped it in a function):" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "68e664ff", - "metadata": {}, - "outputs": [], - "source": [ - "def predict_from_model(model):\n", - " classes = [\n", - " \"T-shirt/top\",\n", - " \"Trouser\",\n", - " \"Pullover\",\n", - " \"Dress\",\n", - " \"Coat\",\n", - " \"Sandal\",\n", - " \"Shirt\",\n", - " \"Sneaker\",\n", - " \"Bag\",\n", - " \"Ankle boot\",\n", - " ]\n", - "\n", - " model.eval()\n", - " x, y = test_data[0][0], test_data[0][1]\n", - " with torch.no_grad():\n", - " pred = model(x)\n", - " predicted, actual = classes[pred[0].argmax(0)], classes[y]\n", - " print(f'Predicted: \"{predicted}\", Actual: \"{actual}\"')\n" - ] - }, - { - "cell_type": "markdown", - "id": "1abf022a", - "metadata": {}, - "source": [ - "We can use our saved model with the existing code to do prediction:" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "0c135a17", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Predicted: \"Ankle boot\", Actual: \"Ankle boot\"\n" - ] - } - ], - "source": [ - "from ray.train.torch import TorchCheckpoint\n", - "\n", - "model = TorchCheckpoint.from_checkpoint(result.checkpoint).get_model(NeuralNetwork())\n", - "\n", - "predict_from_model(model)" - ] - }, - { - "cell_type": "markdown", - "id": "f6fc1441", - "metadata": {}, - "source": [ - "To predict more than one example, we can use a loop:" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "17652fa1", - "metadata": {}, - "outputs": [], - "source": [ - "classes = [\n", - " \"T-shirt/top\",\n", - " \"Trouser\",\n", - " \"Pullover\",\n", - " \"Dress\",\n", - " \"Coat\",\n", - " \"Sandal\",\n", - " \"Shirt\",\n", - " \"Sneaker\",\n", - " \"Bag\",\n", - " \"Ankle boot\",\n", - "]\n", - "\n", - "def predict_from_model(model, data):\n", - " model.eval()\n", - " with torch.no_grad():\n", - " for x, y in data:\n", - " pred = model(x)\n", - " predicted, actual = classes[pred[0].argmax(0)], classes[y]\n", - " print(f'Predicted: \"{predicted}\", Actual: \"{actual}\"')\n" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "3bc14ed6", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Predicted: \"Ankle boot\", Actual: \"Ankle boot\"\n", - "Predicted: \"Pullover\", Actual: \"Pullover\"\n", - "Predicted: \"Trouser\", Actual: \"Trouser\"\n", - "Predicted: \"Trouser\", Actual: \"Trouser\"\n", - "Predicted: \"Pullover\", Actual: \"Shirt\"\n", - "Predicted: \"Trouser\", Actual: \"Trouser\"\n", - "Predicted: \"Coat\", Actual: \"Coat\"\n", - "Predicted: \"Pullover\", Actual: \"Shirt\"\n", - "Predicted: \"Sneaker\", Actual: \"Sandal\"\n", - "Predicted: \"Sneaker\", Actual: \"Sneaker\"\n" - ] - } - ], - "source": [ - "predict_from_model(model, [test_data[i] for i in range(10)])" - ] - }, - { - "cell_type": "markdown", - "id": "a0ce0733", - "metadata": {}, - "source": [ - "## Using Ray AIR for scalable batch prediction\n", - "However, we can also use Ray AIRs `BatchPredictor` class to do scalable prediction." - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "id": "4d8b0f50", - "metadata": {}, - "outputs": [], - "source": [ - "from ray.train.batch_predictor import BatchPredictor\n", - "from ray.train.torch import TorchPredictor\n", - "\n", - "batch_predictor = BatchPredictor.from_checkpoint(result.checkpoint, TorchPredictor, model=NeuralNetwork())" - ] - }, - { - "cell_type": "markdown", - "id": "ad556eeb", - "metadata": {}, - "source": [ - "Batch predictors work with Ray Data. Here we convert our test dataset into a Dataset - note that this is not very efficient, and you can look at our {ref}`other tutorials ` to see more efficient ways to generate a Dataset." - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "id": "8cb0556f", - "metadata": {}, - "outputs": [], - "source": [ - "import ray.data\n", - "\n", - "ds = ray.data.from_items([x.numpy() for x, y in test_data], parallelism=8)" - ] - }, - { - "cell_type": "markdown", - "id": "264dd2e4", - "metadata": {}, - "source": [ - "We can then trigger prediction with two workers:" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "id": "8a823f7a", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Map Progress (2 actors 1 pending): 100%|██████████| 8/8 [00:02<00:00, 70.01it/s]\n" - ] - } - ], - "source": [ - "results = batch_predictor.predict(ds, batch_size=32, min_scoring_workers=2)" - ] - }, - { - "cell_type": "markdown", - "id": "41094a55", - "metadata": {}, - "source": [ - "`results` is another Dataset. We can use `results.show()` to see our prediction results:" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "d3dce40d", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'predictions': array([-1.6813023 , -1.80252 , -0.7062941 , -1.311813 , -0.73570144,\n", - " 1.5710734 , -0.7933277 , 2.0013504 , 1.3654878 , 2.3410547 ],\n", - " dtype=float32)}\n", - "{'predictions': array([ 0.7655406 , -2.3314183 , 2.7599745 , -0.9481916 , 2.381936 ,\n", - " -1.7827132 , 1.9278868 , -3.1977224 , 0.99582016, -1.4932251 ],\n", - " dtype=float32)}\n", - "{'predictions': array([ 1.3619348 , 3.6063552 , -0.31104898, 2.543014 , 0.35176522,\n", - " -2.2156405 , 0.33978355, -2.346588 , -1.7794112 , -2.3220763 ],\n", - " dtype=float32)}\n", - "{'predictions': array([ 1.0049653 , 2.828181 , -0.29254377, 2.0342605 , 0.12778719,\n", - " -1.6141529 , 0.17694427, -1.7565594 , -1.4074212 , -1.6818824 ],\n", - " dtype=float32)}\n", - "{'predictions': array([ 0.7404187 , -1.0129585 , 1.0854365 , -0.20976087, 1.0174558 ,\n", - " -0.9567458 , 1.0075954 , -1.7656276 , 0.42417505, -0.82513285],\n", - " dtype=float32)}\n", - "{'predictions': array([ 1.4985809 , 2.499547 , 0.12339873, 1.9594493 , 0.717446 ,\n", - " -2.0457497 , 0.6526047 , -2.4334526 , -1.4454234 , -2.2310004 ],\n", - " dtype=float32)}\n", - "{'predictions': array([ 0.45234615, -0.23714153, 0.63517165, 0.04347774, 0.6996659 ,\n", - " -0.5516397 , 0.64028525, -1.0785 , 0.10881007, -0.9026278 ],\n", - " dtype=float32)}\n", - "{'predictions': array([ 0.38904738, -0.80522966, 1.1767559 , -0.21403429, 1.1468315 ,\n", - " -0.84129035, 0.95365965, -1.6148682 , 0.27161083, -0.96888554],\n", - " dtype=float32)}\n", - "{'predictions': array([-0.54510164, -0.31364274, -0.22182664, -0.25785953, -0.25741974,\n", - " 0.48500216, -0.2174497 , 0.7817588 , 0.34047806, 0.24852225],\n", - " dtype=float32)}\n", - "{'predictions': array([-1.2857382 , -0.9965143 , -0.64847904, -0.7487341 , -0.60564923,\n", - " 1.1155919 , -0.59477496, 2.0135763 , 0.88436544, 1.067797 ],\n", - " dtype=float32)}\n", - "{'predictions': array([ 0.43335694, -0.8999133 , 1.7488041 , -0.31407052, 1.6201458 ,\n", - " -1.2921515 , 1.2184532 , -2.068122 , 0.17047453, -1.2746251 ],\n", - " dtype=float32)}\n", - "{'predictions': array([-1.267686 , -1.2830508 , -0.4776874 , -0.94430155, -0.51243144,\n", - " 1.167536 , -0.48850274, 1.4446495 , 1.00295 , 1.4936616 ],\n", - " dtype=float32)}\n", - "{'predictions': array([-1.313108 , -1.2630323 , -0.4338272 , -0.9408438 , -0.42691046,\n", - " 1.0805027 , -0.47953707, 1.6175348 , 1.2289674 , 0.99234164],\n", - " dtype=float32)}\n", - "{'predictions': array([ 0.8392371 , 2.005179 , -0.51027215, 2.2383528 , 0.11543664,\n", - " -1.418318 , 0.10795547, -1.5231588 , -0.9388958 , -1.2481594 ],\n", - " dtype=float32)}\n", - "{'predictions': array([ 0.9623028 , -1.5077128 , 1.9832315 , -0.06346714, 2.3645868 ,\n", - " -2.1186042 , 1.7628006 , -3.423348 , 0.84258574, -1.9048262 ],\n", - " dtype=float32)}\n", - "{'predictions': array([ 1.1201253 , 2.6718287 , -0.22753508, 2.1176536 , 0.23477581,\n", - " -1.691438 , 0.2711372 , -1.9383426 , -1.3917452 , -1.7475704 ],\n", - " dtype=float32)}\n", - "{'predictions': array([ 0.70388055, -0.6840804 , 1.1767206 , -0.21303988, 0.96372414,\n", - " -0.94062155, 0.92242914, -1.689395 , 0.23195787, -1.00324 ],\n", - " dtype=float32)}\n", - "{'predictions': array([ 0.78447473, -1.2020342 , 1.51774 , -0.36963996, 1.368768 ,\n", - " -1.3143553 , 1.2229909 , -2.284686 , 0.6896354 , -1.0750523 ],\n", - " dtype=float32)}\n", - "{'predictions': array([-1.5986964 , -2.6742263 , -0.04187664, -1.7070676 , -0.00644506,\n", - " 1.1022365 , -0.31155828, 1.3389733 , 2.226508 , 1.72136 ],\n", - " dtype=float32)}\n", - "{'predictions': array([ 2.9001627 , 0.791762 , 1.148489 , 1.6756771 , 1.5494249 ,\n", - " -2.8295102 , 1.7419 , -4.0650196 , -0.98189455, -2.9981184 ],\n", - " dtype=float32)}\n" - ] - } - ], - "source": [ - "results.show()" - ] - }, - { - "cell_type": "markdown", - "id": "427b68e8", - "metadata": {}, - "source": [ - "If we want to convert these predictions into class names (as in the original example), we can use a `map` function to do this:" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "f17b5c10", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Map_Batches: 100%|██████████| 8/8 [00:02<00:00, 80.05it/s]\n" - ] - } - ], - "source": [ - "predicted_classes = results.map_batches(\n", - " lambda batch: {\"pred\": [classes[pred.argmax(0)] for pred in batch[\"predictions\"]]}, \n", - " batch_size=32,\n", - " batch_format=\"pandas\")" - ] - }, - { - "cell_type": "markdown", - "id": "cb7040db", - "metadata": {}, - "source": [ - "To see how well our prediction did, let's zip the predicted labels together with some of the actual labels to compare them:" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "207e13b9", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "('Ankle boot', 'Ankle boot')\n", - "('Pullover', 'Pullover')\n", - "('Trouser', 'Trouser')\n", - "('Trouser', 'Trouser')\n", - "('Pullover', 'Shirt')\n", - "('Trouser', 'Trouser')\n", - "('Coat', 'Coat')\n", - "('Pullover', 'Shirt')\n", - "('Sneaker', 'Sandal')\n", - "('Sneaker', 'Sneaker')\n", - "('Pullover', 'Coat')\n", - "('Ankle boot', 'Sandal')\n", - "('Sneaker', 'Sneaker')\n", - "('Dress', 'Dress')\n", - "('Coat', 'Coat')\n", - "('Trouser', 'Trouser')\n", - "('Pullover', 'Pullover')\n", - "('Pullover', 'Coat')\n", - "('Bag', 'Bag')\n", - "('T-shirt/top', 'T-shirt/top')\n" - ] - } - ], - "source": [ - "real_classes = [classes[y] for x, y in test_data]\n", - "for predicted, real in zip(predicted_classes.take_batch()[\"pred\"], real_classes):\n", - " print((predicted, real))" - ] - }, - { + "attachments": {}, "cell_type": "markdown", "id": "2963e1f7", "metadata": {}, @@ -1300,7 +958,7 @@ "- save and retrieve model checkpoints via Ray AIR\n", "- load a model for batch prediction\n", "\n", - "In our {ref}`other examples ` you can learn how to do more things with the Ray AIR API, such as **serving your model with Ray Serve** or **tune your hyperparameters with Ray Tune.** You can also learn how to **construct Ray Data** to leverage Ray AIR's **preprocessing** API.\n", + "In our {ref}`other examples ` you can learn how to do more things with the Ray AIR API, such as **serving your model with Ray Serve** or **tune your hyperparameters with Ray Tune.** You can also learn how to perform {ref}`offline batch inference ` with Ray Data.\n", "\n", "We hope this tutorial gave you a good starting point to leverage Ray AIR. If you have any questions, suggestions, or run into any problems pelase reach out on [Discuss](https://discuss.ray.io/) or [GitHub](https://github.com/ray-project/ray)!" ] diff --git a/doc/source/ray-air/examples/convert_existing_tf_code_to_ray_air.ipynb b/doc/source/ray-air/examples/convert_existing_tf_code_to_ray_air.ipynb index 400e4cfa090a..c4a16461e0d6 100644 --- a/doc/source/ray-air/examples/convert_existing_tf_code_to_ray_air.ipynb +++ b/doc/source/ray-air/examples/convert_existing_tf_code_to_ray_air.ipynb @@ -18,13 +18,13 @@ "- Automatic checkpointing/fault tolerance and result tracking\n", "- Parallel data preprocessing\n", "- Seamless integration with hyperparameter tuning\n", - "- Scalable batch prediction\n", "- Scalable model serving\n", "\n", - "This tutorial will show you how to start with Ray AIR from your existing Tensorflow training code. We will learn how to perform **distributed data-parallel training** and do **scalable batch prediction**." + "This tutorial will show you how to start with Ray AIR from your existing Tensorflow training code. We will learn how to perform **distributed data-parallel training**." ] }, { + "attachments": {}, "cell_type": "markdown", "id": "2b4c7109", "metadata": {}, @@ -37,6 +37,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "de8dfe91", "metadata": {}, @@ -66,6 +67,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "42445269", "metadata": {}, @@ -105,6 +107,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "f898c4b1", "metadata": {}, @@ -132,6 +135,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "58ceff67", "metadata": {}, @@ -160,6 +164,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "310c5f98", "metadata": {}, @@ -195,6 +200,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "0e70160c", "metadata": {}, @@ -229,6 +235,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "2d741835", "metadata": {}, @@ -281,6 +288,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "d5ce3827", "metadata": {}, @@ -293,6 +301,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "4b122ade", "metadata": {}, @@ -448,6 +457,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "1d8630f9-ed38-4b36-8dd5-1e1acc63c66e", "metadata": {}, @@ -458,6 +468,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "c2949ebd", "metadata": {}, @@ -490,6 +501,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "6774b3dd", "metadata": {}, @@ -498,6 +510,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "cfdda09f", "metadata": {}, @@ -539,6 +552,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "dbf8d1ac", "metadata": {}, @@ -547,6 +561,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "8b461680", "metadata": {}, @@ -586,236 +601,7 @@ ] }, { - "cell_type": "markdown", - "id": "68aa1557", - "metadata": {}, - "source": [ - "## Using the trained model for prediction\n", - "\n", - "The original Tensorflow quickstart tutorial doesn't actually go over loading the model for prediction, but let's see how you would complete the machine learning lifecycle with scalable batch prediction with Ray AIR!" - ] - }, - { - "cell_type": "markdown", - "id": "e8841935", - "metadata": {}, - "source": [ - "### Loading the model for prediction\n", - "\n", - "Here's how to load an [AIR Predictor](air-predictors) `TensorflowPredictor` from a checkpoint and perform model inference on some sample data." - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "7ec884ee", - "metadata": {}, - "outputs": [], - "source": [ - "from ray.train.tensorflow import TensorflowCheckpoint, TensorflowPredictor\n", - "\n", - "tf_checkpoint: TensorflowCheckpoint = result.checkpoint\n", - "predictor = TensorflowPredictor.from_checkpoint(\n", - " tf_checkpoint, model_definition=build_model\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "c3890dfe", - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAOQAAADcCAYAAABgfg1NAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/av/WaAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAaoUlEQVR4nO3de1RTV74H8G8CSXgHQUhCBcTWV6XilApSH4NKiWJ9t9bazkLrVFuDz5nrUtdUq3WVXtur+ECnzmphpgPioFdZUmtHEfFR1ApYLz4YxotKiwRs5SEgr+z7h5fYeA6YQCAb+H3WOmvJL/uc7IN8s8/ZJyeRMMYYCCFckNq6A4SQxyiQhHCEAkkIRyiQhHCEAkkIRyiQhHCEAkkIRyiQhHCEAkkIR7pdIPv374/58+cbfz516hQkEglOnTplteeQSCT48MMPrbY9Ik4ikSAmJsZq27t16xYkEgkSExOtts2uZlEgExMTIZFIjIuDgwMGDRqEmJgY6PX6zupjpzh69GiPDt38+fNN/q+eXH766SeLt9ny4nfgwIFO6HH39GQmnlySkpIs2p59ezqxadMmBAQE4OHDhzh79iz27NmDo0ePIj8/H05OTu3ZZLuNGzcOdXV1kMvlFq139OhRxMfHi4ayrq4O9vbt+tVwY/HixYiIiDCpMcbw3nvvoX///njmmWds1LOeZdy4cfjqq68E9W3btuGHH37AxIkTLdpeu/7qJk+ejJdeegkA8Pvf/x6enp7YunUr0tLS8Oabb4quU1NTA2dn5/Y8XZukUikcHBysuk1rb88WwsLCEBYWZlI7e/Ysamtr8dZbb9moVz3PgAEDMGDAAJNaXV0dlixZggkTJkCtVlu0PaucQ06YMAEAUFRUBODR4ZKLiwtu3ryJqKgouLq6Gv8IDAYD4uLiMGzYMDg4OEClUmHx4sW4f/++yTYZY9i8eTP69esHJycnjB8/HlevXhU8d2vnkBcuXEBUVBT69OkDZ2dnDB8+HNu3bzf2Lz4+HgBMDi9aiJ1D5uXlYfLkyXBzc4OLiwsmTpyI8+fPm7RpOXw5d+4cVq1aBS8vLzg7O2PmzJkoLy+38LdqfcnJyZBIJJg3b16nPs9nn32Gl19+GZ6ennB0dERwcHCbh7lJSUkYPHgwHBwcEBwcjNOnTwva/PTTT3jnnXegUqmgUCgwbNgwfPnll525G+125MgRVFdXt+uFzyrHZTdv3gQAeHp6GmtNTU3QarUYM2YMPvvsM+Oh7OLFi5GYmIgFCxZg2bJlKCoqwq5du5CXl4dz585BJpMBANavX4/NmzcjKioKUVFRyM3NRWRkJBoaGp7an+PHj+PVV1+FRqPB8uXLoVarcf36daSnp2P58uVYvHgxSkpKcPz4cdHDjSddvXoVY8eOhZubG1avXg2ZTIbPP/8c4eHhyMrKQmhoqEn7pUuXok+fPtiwYQNu3bqFuLg4xMTEYP/+/W0+T319Paqrq5/aHwDo27evWe1aNDY24h//+Adefvll9O/f36J1LbV9+3ZMmzYNb731FhoaGpCSkoLXX38d6enpmDJliknbrKws7N+/H8uWLYNCocDu3bsxadIkXLx4EYGBgQAAvV6PUaNGGSeBvLy88M0332DhwoWoqqrCihUrLOqfwWDAL7/8YlZbpVJp/Js0V1JSEhwdHTFr1iyL1gMAMAskJCQwAOzEiROsvLycFRcXs5SUFObp6ckcHR3Zjz/+yBhjLDo6mgFga9asMVn/zJkzDABLSkoyqR87dsykXlZWxuRyOZsyZQozGAzGduvWrWMAWHR0tLGWmZnJALDMzEzGGGNNTU0sICCA+fv7s/v375s8z6+3pdPpWGu7D4Bt2LDB+POMGTOYXC5nN2/eNNZKSkqYq6srGzdunOD3ExERYfJcK1euZHZ2dqyiokL0+Z5c35zFUkeOHGEA2O7duy1et0XL7zo1NbXNdrW1tSY/NzQ0sMDAQDZhwgSTesu+XLp0yVi7ffs2c3BwYDNnzjTWFi5cyDQaDbt3757J+nPnzmVKpdL4fEVFRQwAS0hIaLN/Le3MWVr+rsz1888/M7lczubMmWPRei3aNUI+OVng7++PpKQkwUTB+++/b/JzamoqlEolXnnlFdy7d89YDw4OhouLCzIzMzFv3jycOHECDQ0NWLp0qcmh5IoVK/Dxxx+32be8vDwUFRVh27ZtcHd3N3ns19syV3NzM/75z39ixowZJucKGo0G8+bNw1/+8hdUVVXBzc3N+NiiRYtMnmvs2LHYtm0bbt++jeHDh7f6XFqtFsePH7e4j+ZITk6GTCbDnDlzOmX7v+bo6Gj89/3799Hc3IyxY8di3759grZhYWEIDg42/uzn54fp06fjyJEjaG5uhlQqxcGDBzFnzhwwxkz+brRaLVJSUpCbm4vRo0eb3T+1Wm327zkoKMjs7QLAgQMH0NDQ0O7z9HYFMj4+HoMGDYK9vT1UKhUGDx4MqdT0dNTe3h79+vUzqRUWFqKyshLe3t6i2y0rKwMA3L59GwAwcOBAk8e9vLzQp0+fNvvWcvjccrjTUeXl5aitrcXgwYMFjw0dOhQGgwHFxcUYNmyYse7n52fSrqXPT54nP0mj0UCj0Vih16YePHiAtLQ0aLVak9OKzpKeno7Nmzfj8uXLqK+vN9bFXhCf/D8GgEGDBqG2thbl5eWQSqWoqKjA3r17sXfvXtHna/m7MZeDg4NgULGWpKQkeHh4YPLkye1av12BDAkJMc6ytkahUAhCajAY4O3t3eq1GS8vr/Z0hzt2dnaidfaUT0upq6tDZWWlWc9hyezd4cOHu2x29cyZM5g2bRrGjRuH3bt3Q6PRQCaTISEhAcnJyRZvz2AwAADefvttREdHi7Zp66hDTHNzs9mTbB4eHmZfUrtz5w7OnDmDRYsWWXze2aJLL7Y9++yzOHHiBEaPHm1yWPMkf39/AI9G1F8fJpaXlz91lHn22WcBAPn5+W2+Cpp7+Orl5QUnJycUFBQIHrtx4wakUil8fX3N2tbT7N+/HwsWLDCr7dPC/WtJSUlwcXHBtGnT2ts1sx08eBAODg749ttvoVAojPWEhATR9oWFhYLav/71Lzg5ORlfoF1dXdHc3Gy1Ua24uBgBAQFmtc3MzER4eLhZbfft2wfGWIde+Lo0kHPmzMHu3bvx0UcfCc4Fm5qa8ODBA7i7uyMiIgIymQw7d+5EZGSkMTxxcXFPfY4XX3wRAQEBiIuLw/z5803OIxljxm21XBOtqKgQnGv+mp2dHSIjI5GWloZbt24ZZyj1ej2Sk5MxZswYk/PHjuiMc8jy8nKcOHECb775Zpe8acPOzg4SiQTNzc3G2q1bt3D48GHR9tnZ2cjNzcWLL74I4FFY0tLSMGnSJOORxuzZs5GcnIz8/HzBqUh5ebnFR1addQ6ZnJwMPz8/jBkzxqL+/FqXBvK3v/0tFi9ejNjYWFy+fBmRkZGQyWQoLCxEamoqtm/fjtdeew1eXl744x//iNjYWLz66quIiopCXl4evvnmm6dO90ulUuzZswdTp07FiBEjsGDBAmg0Gty4cQNXr17Ft99+CwDGiYRly5ZBq9XCzs4Oc+fOFd3m5s2bcfz4cYwZMwZLliyBvb09Pv/8c9TX12PLli1W+/10xjnk/v370dTU1Oar9ocffoiNGzeaPRocPHgQN27cENSjo6MxZcoUbN26FZMmTcK8efNQVlaG+Ph4PPfcc7hy5YpgncDAQGi1WpPLHgCwceNGY5tPPvkEmZmZCA0Nxbvvvovnn38ev/zyC3Jzc3HixAmzL2G06IxzyPz8fFy5cgVr1qxp1+ShkSVTsi3T8t9//32b7aKjo5mzs3Orj+/du5cFBwczR0dH5urqyl544QW2evVqVlJSYmzT3NzMNm7cyDQaDXN0dGTh4eEsPz+f+fv7t3nZo8XZs2fZK6+8wlxdXZmzszMbPnw427lzp/HxpqYmtnTpUubl5cUkEonJpQQ8cdmDMcZyc3OZVqtlLi4uzMnJiY0fP5599913Zv1+WutjVxg1ahTz9vZmTU1Nrbb5wx/+wCQSCbt+/Xqb22rZj9aWM2fOMMYY++KLL9jAgQOZQqFgQ4YMYQkJCWzDhg2CyzUAmE6nY3//+9+N7X/zm9+I/p70ej3T6XTM19eXyWQyplar2cSJE9nevXuNbcy97NEZ1qxZwwCwK1eudGg7Esboc1l7u5CQEPj7+yM1NdXWXen1KJC9XFVVFby8vHD58mUMHTrU1t3p9SiQhHCk292gTEhPRoEkhCMUSEI4QoEkhCOd9saA+Ph4fPrppygtLUVQUBB27tyJkJCQp65nMBhQUlICV1fXjl1gJaQNjDFUV1fDx8dH8J5rm+roBVExKSkpTC6Xsy+//JJdvXqVvfvuu8zd3Z3p9fqnrltcXGz2vWq00NLRpbi4uDMi0G6dctkjNDQUI0eOxK5duwA8GvV8fX2xdOlSrFmzps11Kysr4e7ujjGIgj3a9455Qp6mCY04i6OoqKiAUqm0dXeMrH7I2tDQgJycHKxdu9ZYk0qliIiIQHZ2tqB9fX29yT1zLR9hYQ8Z7CUUSNJJ/n8Y4u20yOoHz/fu3UNzczNUKpVJXaVSobS0VNA+NjYWSqXSuFjrViZCuiObn82uXbsWlZWVxqW4uNjWXSLEZqx+yNq3b1/Y2dkJPslcr9eL3uWuUChMbmQlpDez+ggpl8sRHByMjIwMY81gMCAjI0Pwwb2EEFOdch1y1apViI6OxksvvYSQkBDExcWhpqbG7I+nIKS36pRAvvHGGygvL8f69etRWlqKESNG4NixY4KJHkKIKe5uv6qqqoJSqUQ4ptNlD9JpmlgjTiENlZWVVvtMJGuw+SwrIeQxCiQhHKFAEsIRCiQhHKFAEsIRCiQhHKFAEsIRCiQhHKFAEsIRCiQhHKFAEsIRCiQhHKFAEsIRCiQhHKFAEsIRCiQhHKFAEsIRCiQhHKFAEsIRCiQhHKFAEsIRCiQhHKFAEsIRCiQhHKFAEsIRCiQhHKFAEsIRCiQhHOmUb7/qCX5+V/hdln6/+7do2xtlwm/1aqgX/6KgZ/YJ604/PhBta7h8ra0ukh6IRkhCOEKBJIQjFEhCOEKBJIQjFEhCOEKzrK1Y/R/Jgtps5/vijZ+1YMPhwtKtplrRptvLx1uwYdu7WOYvqDn/l1K0rX1GTmd3p1uiEZIQjlAgCeEIBZIQjlAgCeEITeq0Yse6uYLa+uHir199rjNB7f5QiWhb+fAKQW1L4H+Ltt2muSCofV3rItp2ipP42+/MVccaROsX6p0FtXCHRvGNiPT3uTcWizYdlGF+33oTGiEJ4QgFkhCOUCAJ4QgFkhCOWBzI06dPY+rUqfDx8YFEIsHhw4dNHmeMYf369dBoNHB0dERERAQKCwut1V9CejSLZ1lramoQFBSEd955B7NmzRI8vmXLFuzYsQN//etfERAQgA8++ABarRbXrl2Dg4ODVTrdFZwPCGcMnQ+Yv76bBc+1Ux0uWt88ur9wu1niN0lvCX/OgmcUsq8ziNadr9wV1DxPHxRt+4Jc5ObrW+I3ahNxFgdy8uTJmDx5suhjjDHExcXhT3/6E6ZPnw4A+Nvf/gaVSoXDhw9j7lzhpQRCyGNWPYcsKipCaWkpIiIijDWlUonQ0FBkZ2eLrlNfX4+qqiqThZDeyqqBLC0tBQCoVKafMaNSqYyPPSk2NhZKpdK4+Pr6WrNLhHQrNp9lXbt2LSorK41LcXGxrbtEiM1Y9a1zarUaAKDX66HRaIx1vV6PESNGiK6jUCigUCis2Y1up6lUL1p3PiisN7eyDecDP1uxR4/pfy/89L1hcvE/m89+GSyo9U/4X9G2TR3rVo9l1REyICAAarUaGRmP36hYVVWFCxcuICxM+B9LCDFl8Qj54MED/Pvfj6fei4qKcPnyZXh4eMDPzw8rVqzA5s2bMXDgQONlDx8fH8yYMcOa/SakR7I4kJcuXcL48Y8/WmLVqlUAgOjoaCQmJmL16tWoqanBokWLUFFRgTFjxuDYsWPd6hokIbZicSDDw8PBmPB2oxYSiQSbNm3Cpk2bOtQxQnojm8+yEkIeoxuUCQDA3l/8+u+udbsENZnETrRt6vYIQc3zrvgbQog4GiEJ4QgFkhCOUCAJ4QgFkhCO0KQOAQDcWPmMaH2kQvjpeVcb6kTbelwT/0oEYj4aIQnhCAWSEI5QIAnhCAWSEI5QIAnhCM2y9kL1U0YKarmvbWultfDm8feXLxdt6fjdxY50i4BGSEK4QoEkhCMUSEI4QoEkhCM0qdML3ZksfB12kYh/8t+bRa8Iak7HfhBt2/rnSBBz0QhJCEcokIRwhAJJCEcokIRwhAJJCEdolrUHk7q6itZ/N/asoFZleCjatuzjAYKaov77jnWMtIpGSEI4QoEkhCMUSEI4QoEkhCM0qdODFX44TLSe3ne3oDa9cLZoW8VRmsDpSjRCEsIRCiQhHKFAEsIRCiQhHKFAEsIRmmXtISrfHiWoXXljh2jbm02NgtqD/+wn2laBux3rGLEIjZCEcIQCSQhHKJCEcIQCSQhHaFKnm7F/xke0vuKD/YKaQiL+3zv3h98Jal7f0FvkeEAjJCEcoUASwhEKJCEcoUASwhGLAhkbG4uRI0fC1dUV3t7emDFjBgoKCkzaPHz4EDqdDp6ennBxccHs2bOh1+ut2mlCeiqLZlmzsrKg0+kwcuRINDU1Yd26dYiMjMS1a9fg7OwMAFi5ciW+/vprpKamQqlUIiYmBrNmzcK5c+c6ZQd6Mom98L8nKP1H0bavu/wsqCVVe4u2VX0gfB02WNg30jksCuSxY8dMfk5MTIS3tzdycnIwbtw4VFZW4osvvkBycjImTJgAAEhISMDQoUNx/vx5jBolfL8lIeSxDp1DVlZWAgA8PDwAADk5OWhsbERERISxzZAhQ+Dn54fs7GzRbdTX16OqqspkIaS3ancgDQYDVqxYgdGjRyMwMBAAUFpaCrlcDnd3d5O2KpUKpaWlotuJjY2FUqk0Lr6+vu3tEiHdXrsDqdPpkJ+fj5SUlA51YO3ataisrDQuxcXFHdoeId1Zu946FxMTg/T0dJw+fRr9+j2+j06tVqOhoQEVFRUmo6Rer4darRbdlkKhgEIh/mWhvV7QYEHpI++vzF49/uPXRevuP4ifPhDbs2iEZIwhJiYGhw4dwsmTJxEQEGDyeHBwMGQyGTIyMoy1goIC3LlzB2FhYdbpMSE9mEUjpE6nQ3JyMtLS0uDq6mo8L1QqlXB0dIRSqcTChQuxatUqeHh4wM3NDUuXLkVYWBjNsBJiBosCuWfPHgBAeHi4ST0hIQHz588HAGzbtg1SqRSzZ89GfX09tFotdu8WfjAvIUTIokAyxp7axsHBAfHx8YiPj293pwjprei9rIRwhG5Q5oDd84NE64tS0szexvNf6gS1/l+db3efiG3QCEkIRyiQhHCEAkkIRyiQhHCEJnU4cGNJH9H6VCfz73zpd6pBWDTjMhXhC42QhHCEAkkIRyiQhHCEAkkIRyiQhHCEZlm72MOpIYJaxtT/aqW1U+d2hnCHRkhCOEKBJIQjFEhCOEKBJIQjNKnTxUpG2wlqfvbmT9609vUAsirhW+fojXPdD42QhHCEAkkIRyiQhHCEAkkIRyiQhHCEZlk5Fvvz84Jatra/aFt29386uTekK9AISQhHKJCEcIQCSQhHKJCEcIQmdbrYgDXCL0uNWvOiBVsQ/2p40jPQCEkIRyiQhHCEAkkIRyiQhHCEu0mdlm9pbkIj3dBHOk0TGgGY963gXYm7QFZXVwMAzuKojXtCeoPq6moolUpbd8NIwjh7iTAYDCgpKYGrqyuqq6vh6+uL4uJiuLm52bprVlVVVUX7ZkOMMVRXV8PHxwdSKT9nbtyNkFKpFP369QMASCQSAICbmxu3/7EdRftmOzyNjC34eWkghFAgCeEJ14FUKBTYsGEDFAqFrbtidbRvRAx3kzqE9GZcj5CE9DYUSEI4QoEkhCMUSEI4wnUg4+Pj0b9/fzg4OCA0NBQXL160dZcsdvr0aUydOhU+Pj6QSCQ4fPiwyeOMMaxfvx4ajQaOjo6IiIhAYWGhbTprgdjYWIwcORKurq7w9vbGjBkzUFBQYNLm4cOH0Ol08PT0hIuLC2bPng29Xm+jHncP3AZy//79WLVqFTZs2IDc3FwEBQVBq9WirKzM1l2zSE1NDYKCghAfHy/6+JYtW7Bjxw78+c9/xoULF+Ds7AytVouHDx92cU8tk5WVBZ1Oh/Pnz+P48eNobGxEZGQkampqjG1WrlyJI0eOIDU1FVlZWSgpKcGsWbNs2OtugHEqJCSE6XQ648/Nzc3Mx8eHxcbG2rBXHQOAHTp0yPizwWBgarWaffrpp8ZaRUUFUygUbN++fTboYfuVlZUxACwrK4sx9mg/ZDIZS01NNba5fv06A8Cys7Nt1U3ucTlCNjQ0ICcnBxEREcaaVCpFREQEsrOFn0nTXRUVFaG0tNRkP5VKJUJDQ7vdflZWVgIAPDw8AAA5OTlobGw02bchQ4bAz8+v2+1bV+IykPfu3UNzczNUKpVJXaVSobS053zIU8u+dPf9NBgMWLFiBUaPHo3AwEAAj/ZNLpfD3d3dpG1327euxt3dHqT70el0yM/Px9mzZ23dlW6PyxGyb9++sLOzE8zI6fV6qNVqG/XK+lr2pTvvZ0xMDNLT05GZmWm8bQ54tG8NDQ2oqKgwad+d9s0WuAykXC5HcHAwMjIyjDWDwYCMjAyEhYXZsGfWFRAQALVabbKfVVVVuHDhAvf7yRhDTEwMDh06hJMnTyIgIMDk8eDgYMhkMpN9KygowJ07d7jfN5uy9axSa1JSUphCoWCJiYns2rVrbNGiRczd3Z2VlpbaumsWqa6uZnl5eSwvL48BYFu3bmV5eXns9u3bjDHGPvnkE+bu7s7S0tLYlStX2PTp01lAQACrq6uzcc/b9v777zOlUslOnTrF7t69a1xqa2uNbd577z3m5+fHTp48yS5dusTCwsJYWFiYDXvNP24DyRhjO3fuZH5+fkwul7OQkBB2/vx5W3fJYpmZmQyPPq7LZImOjmaMPbr08cEHHzCVSsUUCgWbOHEiKygosG2nzSC2TwBYQkKCsU1dXR1bsmQJ69OnD3NycmIzZ85kd+/etV2nuwG6/YoQjnB5DklIb0WBJIQjFEhCOEKBJIQjFEhCOEKBJIQjFEhCOEKBJIQjFEhCOEKBJIQjFEhCOEKBJIQj/wdXjCvjyw2SbwAAAABJRU5ErkJggg==", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAOQAAADcCAYAAABgfg1NAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/av/WaAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAbl0lEQVR4nO3de1gTZ74H8G+4JCCEgAoJVG7WUisWaKlavF8QHlFa1K26dbfg9qy0BizSXVfcVqt1i7ejtIi221NxdbW62PVaF4uIWileivS03pBaULaYqF0JFyVc8p4/PKQdM0gCwbzo7/M8eR7zyzsz7xC/mZl3ZhIJY4yBEMIFO1t3gBDyMwokIRyhQBLCEQokIRyhQBLCEQokIRyhQBLCEQokIRyhQBLCkW4XyICAACQkJBifHzlyBBKJBEeOHLHaMiQSCd555x2rzY+ICwgIwKRJk6w6z+7+3lkUyE2bNkEikRgfTk5OCAoKQlJSErRabVf1sUscOHCgW79x7Tl9+jSSkpIQHBwMFxcX+Pn5Ydq0abh06VKH51lRUQGJRILVq1dbsafd208//YRVq1Zh5MiR8PT0hLu7O55//nns2LGjQ/Nz6MhES5cuRWBgIBoaGnD8+HFs2LABBw4cwNmzZ9GjR48OdaSjRo4ciTt37kAqlVo03YEDB5CVlSUayjt37sDBoUN/Gm6sWLEChYWFeOmllxASEgKNRoN169bh2WefxYkTJzBw4EBbd/GhUFRUhD//+c+IiYnBW2+9BQcHB3z22WeYMWMGzp8/jyVLllg2Q2aB7OxsBoCdPn1aUE9NTWUA2LZt29qctq6uzpJFtcnf35/Fx8d3ej5qtZpZuPrdSmFhIdPr9YLapUuXmEwmYzNnzuzQPMvLyxkAtmrVKmt0kfn7+7OJEydaZV6tALDFixdbdZ7388MPP7CKigpBzWAwsLFjxzKZTGbx/3urHEOOHTsWAFBeXg4ASEhIgKurKy5fvoyYmBjI5XLMnDkTAGAwGJCRkYHg4GA4OTlBqVQiMTERt27duveDAsuWLUOfPn3Qo0cPjBkzBufOnTNZdlvHkCdPnkRMTAw8PDzg4uKCkJAQvP/++8b+ZWVlAYBgF7yV2HFISUkJJkyYADc3N7i6umLcuHE4ceKEoE3rLn1hYSFSU1Ph6ekJFxcXTJ48GTdu3LDwr9o5Q4cONdlreOKJJxAcHIwLFy506bKzs7MxduxYeHl5QSaTYcCAAdiwYUOb7b/44guEhYXByckJAwYMwD//+U+TNtXV1UhJSYGvry9kMhn69euHFStWwGAwdOWqtCswMBD+/v6CmkQiQVxcHPR6PX744QeL5meV/bLLly8DAHr16mWsNTc3Izo6GsOHD8fq1auNu7KJiYnYtGkTZs2ahblz56K8vBzr1q1DSUkJCgsL4ejoCABYtGgRli1bhpiYGMTExODMmTOIiopCY2Nju/3Jy8vDpEmT4O3tjTfeeAMqlQoXLlzA/v378cYbbyAxMRFVVVXIy8vDli1b2p3fuXPnMGLECLi5uWH+/PlwdHTERx99hNGjR+Po0aMYMmSIoH1ycjI8PDywePFiVFRUICMjA0lJSe0eV+j1etTW1rbbHwDo3bu3We1+iTEGrVaL4OBgi6e1xIYNGxAcHIwXXngBDg4O2LdvH+bMmQODwQC1Wi1oW1ZWhunTp+O1115DfHw8srOz8dJLLyE3Nxfjx48HANy+fRujRo3Cjz/+iMTERPj5+eGrr75CWloarl27hoyMDIv7ePPmTbPayeVyyGQyi+ev0WgAdOB9smRz2rrLeujQIXbjxg1WWVnJtm/fznr16sWcnZ3Zv//9b8YYY/Hx8QwAW7BggWD6L7/8kgFgW7duFdRzc3MF9evXrzOpVMomTpzIDAaDsd3ChQsZAMEua0FBAQPACgoKGGOMNTc3s8DAQObv789u3bolWM4v53W/XVbcs9sTFxfHpFIpu3z5srFWVVXF5HI5GzlypMnfJzIyUrCsefPmMXt7e1ZdXS26vHunN+fREVu2bGEA2CeffNKh6c3dZb19+7ZJLTo6mvXt21dQ8/f3ZwDYZ599ZqzpdDrm7e3NnnnmGWPt3XffZS4uLuzSpUuC6RcsWMDs7e3Z1atXjbV737u2mPt3zs7Obnde9/rpp5+Yl5cXGzFihMXTdmgLGRkZKXju7++PrVu34rHHHhPUX3/9dcHznJwcKBQKjB8/XvAJFR4eDldXVxQUFODll1/GoUOH0NjYiOTkZMGuZEpKCt5777379q2kpATl5eVYu3Yt3N3dBa/9cl7mamlpwRdffIG4uDj07dvXWPf29sbLL7+Mjz/+GDU1NXBzczO+Nnv2bMGyRowYgbVr1+LKlSsICQlpc1nR0dHIy8uzuI/muHjxItRqNSIiIhAfH98ly2jl7Oxs/LdOp0NTUxNGjRqFgwcPQqfTQaFQGF/38fHB5MmTjc/d3NzwyiuvYMWKFdBoNFCpVMjJycGIESPg4eEh+H8TGRmJ5cuX49ixY8ZDInOZ+3e2dG/CYDBg5syZqK6uRmZmpkXTAh3cZc3KykJQUBAcHBygVCrx5JNPws5OeDjq4OCAPn36CGplZWXQ6XTw8vISne/169cBAFeuXAFw95jnlzw9PeHh4XHfvrXuPltrFPHGjRu4ffs2nnzySZPXnnrqKRgMBlRWVgreOD8/P0G71j7fe5x8L29vb3h7e1uh10IajQYTJ06EQqHAzp07YW9vb/Vl/FJhYSEWL16MoqIi3L59W/DavYHs16+fyQdlUFAQgLunWVQqFcrKyvDtt9/C09NTdHmt/28sce9GxVqSk5ORm5uLzZs3IzQ01OLpOxTIwYMH47nnnrtvG5lMZhJSg8EALy8vbN26VXSatv7g3U1b/+FZO9+WcufOHeh0OrOWoVKpzGqn0+kwYcIEVFdX48svv4SPj49Z03XU5cuXMW7cOPTv3x9r1qyBr68vpFIpDhw4gLVr13ZoEMZgMGD8+PGYP3++6OutAbZE6zFeexQKhWCLfz9LlizB+vXrsXz5cvz2t7+1uE+AlQZ1zPX444/j0KFDGDZs2H1XsnXUqqysTLCbeOPGjXa3Mo8//jgA4OzZs/f9FDR399XT0xM9evRAaWmpyWsXL16EnZ0dfH19zZpXe3bs2IFZs2aZ1ba9cANAQ0MDYmNjcenSJRw6dAgDBgzobBfbtW/fPuj1euzdu1ewp1BQUCDa/vvvvwdjTPB+tF68EBAQAODue1pXV2fVrZq5eyLZ2dmCK8Pa0npOOyUlBX/605863K8HGshp06Zh/fr1ePfdd02OBZubm1FXVwd3d3dERkbC0dERmZmZiIqKMr5Z5oymPfvsswgMDERGRgYSEhIEx5G/fONdXFwA3B1Ov/dY85fs7e0RFRWFPXv2oKKiwvifRKvVYtu2bRg+fLjg+LEzrHkM2dLSgunTp6OoqAh79uxBRESEVebbnta9g19+YOh0OmRnZ4u2r6qqwq5duzBlyhQAQE1NDTZv3oywsDDjXsC0adPwzjvv4ODBg4iOjhZMX11dDVdXV4sv5LDmMeSOHTswd+5czJw5E2vWrLGoH/d6oIEcNWoUEhMTkZ6ejm+++QZRUVFwdHREWVkZcnJy8P777+NXv/oVPD098Yc//AHp6emYNGkSYmJiUFJSgn/961/tDiPb2dlhw4YNiI2NRVhYGGbNmgVvb29cvHgR586dw8GDBwHcHUgCgLlz5yI6Ohr29vaYMWOG6DyXLVuGvLw8DB8+HHPmzIGDgwM++ugj6PV6rFy50mp/H2seQ7755pvYu3cvYmNj8Z///Ad///vfBa//5je/Mf679TSUuVuD/Px8NDQ0mNTj4uIQFRUFqVSK2NhYJCYmoq6uDh9//DG8vLxw7do1k2mCgoLw6quv4vTp01Aqldi4cSO0Wq0gwH/84x+xd+9eTJo0CQkJCQgPD0d9fT2+++477Ny5ExUVFRafXrDW1vbUqVN45ZVX0KtXL4wbN87kcGzo0KGCvbx2WTIk29aVOveKj49nLi4ubb7+17/+lYWHhzNnZ2cml8vZ008/zebPn8+qqqqMbVpaWtiSJUuYt7c3c3Z2ZqNHj2Znz541uVLn3tMerY4fP87Gjx/P5HI5c3FxYSEhISwzM9P4enNzM0tOTmaenp5MIpEITiVAZOj8zJkzLDo6mrm6urIePXqwMWPGsK+++sqsv09bfexKo0aNMvu0SWZmJgPAcnNz7zvP1tMebT22bNnCGGNs7969LCQkhDk5ObGAgAC2YsUKtnHjRgaAlZeXG+fXeqXOwYMHWUhICJPJZKx///4sJyfHZNm1tbUsLS2N9evXj0mlUta7d282dOhQtnr1atbY2GhsJ/bedaX2TlVZetpEwhh9L+ujbtq0aaioqMCpU6ds3ZVHXve+gpp0GmMMR44cMdmlJbZBW0hCONLtblAm5GFGgSSEIxRIQjhCgSSEI102ypqVlYVVq1ZBo9EgNDQUmZmZGDx4cLvTGQwGVFVVQS6Xd+juDELMwRhDbW0tfHx8TK65tilrnyhljLHt27czqVTKNm7cyM6dO8d+//vfM3d3d6bVatudtrKy0ux71ehBj84+KisruyICHdYlpz2GDBmCQYMGYd26dQDubvV8fX2RnJyMBQsW3HdanU4Hd3d3DEcMHOBo7a4RAgBoRhOO4wCqq6sFt4PZmtV3WRsbG1FcXIy0tDRjzc7ODpGRkSgqKjJpr9frodfrjc9bv8LCAY5wkFAgSRf5/80Qb4dFVt95vnnzJlpaWqBUKgV1pVIpeg9aeno6FAqF8WGtW5kI6Y5sfjSblpYGnU5nfFRWVtq6S4TYjNV3WXv37g17e3uTbzLXarWid7nLZLIOfasXIQ8jq28hpVIpwsPDkZ+fb6wZDAbk5+c/sJtkCemuuuQ8ZGpqKuLj4/Hcc89h8ODByMjIQH19vdlfT0HIo6pLAjl9+nTcuHEDixYtgkajQVhYGHJzc00GegghQtzdflVTUwOFQoHReJFOe5Au08yacAR7oNPprPadSNZg81FWQsjPKJCEcIQCSQhHKJCEcIQCSQhHKJCEcIS+BvIBq1hmerVSi5P4mSfPYNNfXS4K/czsZT1+WPxCDPkp099VUX7wldnzJV2HtpCEcIQCSQhHKJCEcIQCSQhHKJCEcIRGWbvIrc+fEK2fDVvXqfk2WXArwMUx/yNa3/qc6W9Q/iNvlGjblgtl5i+QdBptIQnhCAWSEI5QIAnhCAWSEI7QoI4ViA3gFIZt7/R8P6zua1JbUzRetG2Av+lldl8M+Kdo25nyaya1vyT0Fm3b9080qPMg0RaSEI5QIAnhCAWSEI5QIAnhCAWSEI7QKKsFmseFi9YPh2aJVMW/UzbjVpBJrWD6c+ILrLpuUgq69bVoUzsnJ5PaeyefFm27sPd3JrVmj2bxPpAHiraQhHCEAkkIRyiQhHCEAkkIR2hQxwJ1j0lF63Yin2tigzcAcOQF04GWlh9KO9cxAN8vecaktq3nf7fR2vQHcvvk0mczD+hdIIQjFEhCOEKBJIQjFEhCOEKBJIQjNMpqAffNRaL1X339G5Oa5FaNaNvmaxXW7JLRf8UcMqm52pmOphK+0RaSEI5QIAnhCAWSEI5QIAnhCA3qWEHL+UsPbFkVfzH9wVcAeNV9tUjV9B5JAHjz2vMmNfmhC6JtW8zuGbEG2kISwhEKJCEcoUASwhEKJCEcsTiQx44dQ2xsLHx8fCCRSLB7927B64wxLFq0CN7e3nB2dkZkZCTKyujr6Akxh8WjrPX19QgNDcXvfvc7TJkyxeT1lStX4oMPPsDf/vY3BAYG4u2330Z0dDTOnz8PJ5FvRiNtq/6t6Yhq4Stio6mAws70b1uktxdt+80y05uZnWtOWdg70hUsDuSECRMwYcIE0dcYY8jIyMBbb72FF198EQCwefNmKJVK7N69GzNmzOhcbwl5yFn1GLK8vBwajQaRkZHGmkKhwJAhQ1BUJH5htl6vR01NjeBByKPKqoHUaDQAAKVSKagrlUrja/dKT0+HQqEwPnx9fa3ZJUK6FZuPsqalpUGn0xkflZWVtu4SITZj1UvnVCoVAECr1cLb29tY12q1CAsLE51GJpNBJqP79sTcfJaZ1MQGb9oSf+S/ROtBu2kAh1dW3UIGBgZCpVIhPz/fWKupqcHJkycRESF+DSYh5GcWbyHr6urw/fffG5+Xl5fjm2++Qc+ePeHn54eUlBQsW7YMTzzxhPG0h4+PD+Li4qzZb0IeShYH8uuvv8aYMWOMz1NTUwEA8fHx2LRpE+bPn4/6+nrMnj0b1dXVGD58OHJzc+kcJCFmsDiQo0ePBmOmxzatJBIJli5diqVLl3aqY4Q8imw+ykoI+RndoMyBxjx/0XpRf7Hf5hDf9Q8tijepPfXmZdG2dNMxv2gLSQhHKJCEcIQCSQhHKJCEcIQGdR4wh74BJrV3++WItvUQuUyuWC8+X/93TYdqWm7dsqhvxPZoC0kIRyiQhHCEAkkIRyiQhHCEAkkIR2iU9QF7/B8/mtSekZr/ufjr/NdE60H/e7rDfSL8oC0kIRyhQBLCEQokIRyhQBLCERrU6SK34sW/1GuJUuweR/Fv3YuviDSpPTX/e5GWdI/jw4K2kIRwhAJJCEcokIRwhAJJCEcokIRwhEZZrcDhMR+T2oi5J0XbutqZ/zsmRef7mdSCbtElcg8z2kISwhEKJCEcoUASwhEKJCEcoUEdK7iw0PRn2Her9pk9/ZjvXhKti10mR5fIPdxoC0kIRyiQhHCEAkkIRyiQhHCEAkkIR2iU1QqKX1grUjX/EjnFHINovZl+m+ORQ1tIQjhCgSSEIxRIQjhCgSSEIzSow4EmpUK07tj4WJcsr+XGTZMa04v/EqxEZjo4Ze/Z2/xlebqL1svelJo9DzGsRSJa758scrlhTU2nlvUg0RaSEI5QIAnhCAWSEI5QIAnhiEWBTE9Px6BBgyCXy+Hl5YW4uDiUlpYK2jQ0NECtVqNXr15wdXXF1KlTodVqrdppQh5WFo2yHj16FGq1GoMGDUJzczMWLlyIqKgonD9/Hi4uLgCAefPm4fPPP0dOTg4UCgWSkpIwZcoUFBYWdskKPAw+37nxgS5vaMmvTWo3tW6ibT08a01qJ8O3Wb1P1jLgrSSTWt/5RTboScdYFMjc3FzB802bNsHLywvFxcUYOXIkdDodPvnkE2zbtg1jx44FAGRnZ+Opp57CiRMn8Pzzz1uv54Q8hDp1DKnT6QAAPXv2BAAUFxejqakJkZE//2pT//794efnh6Ii8U8pvV6PmpoawYOQR1WHA2kwGJCSkoJhw4Zh4MCBAACNRgOpVAp3d3dBW6VSCY1GIzqf9PR0KBQK48PX1/T7aQh5VHQ4kGq1GmfPnsX27ds71YG0tDTodDrjo7KyslPzI6Q769Clc0lJSdi/fz+OHTuGPn36GOsqlQqNjY2orq4WbCW1Wi1UKpXovGQyGWQil2d1Jy+en2lSyx+40wY9Mc9Xz3zaJfO9zRpNak1M/F5PMTHfJojWdd+Yf6neY8ebzW7LI4u2kIwxJCUlYdeuXTh8+DACAwMFr4eHh8PR0RH5+fnGWmlpKa5evYqICPFfFCaE/MyiLaRarca2bduwZ88eyOVy43GhQqGAs7MzFAoFXn31VaSmpqJnz55wc3NDcnIyIiIiaISVEDNYFMgNGzYAAEaPHi2oZ2dnIyEhAQCwdu1a2NnZYerUqdDr9YiOjsb69eut0llCHnYWBZIx1m4bJycnZGVlISsrq8OdIuRRRdeyEsIRukHZCpyjy01qwe+ZXsIFAKyTf3F5//+I1jt7OVvwl7NE6+yqi9nz6LuzzrR46juzp/dAmUX1hxFtIQnhCAWSEI5QIAnhCAWSEI7QoE4XCVz4YO/Bm4TwTk0fiG+t1BPSGbSFJIQjFEhCOEKBJIQjFEhCOEKBJIQjFEhCOEKBJIQjFEhCOEKBJIQjFEhCOEKBJIQjFEhCOEKBJIQjFEhCOEKBJIQjFEhCOEKBJIQjFEhCOEKBJIQjFEhCOEKBJIQjFEhCOEKBJIQjFEhCOEKBJIQjFEhCOMLdTwm0/kpzM5qA9n+wmZAOaUYTAPN+FfxB4i6QtbW1AIDjOGDjnpBHQW1tLRQKha27YSRhnH1EGAwGVFVVQS6Xo7a2Fr6+vqisrISbm5utu2ZVNTU1tG42xBhDbW0tfHx8YGfHz5Ebd1tIOzs79OnTBwAgkUgAAG5ubty+sZ1F62Y7PG0ZW/Hz0UAIoUASwhOuAymTybB48WLIZDJbd8XqaN2IGO4GdQh5lHG9hSTkUUOBJIQjFEhCOEKBJIQjXAcyKysLAQEBcHJywpAhQ3Dq1Clbd8lix44dQ2xsLHx8fCCRSLB7927B64wxLFq0CN7e3nB2dkZkZCTKysps01kLpKenY9CgQZDL5fDy8kJcXBxKS0sFbRoaGqBWq9GrVy+4urpi6tSp0Gq1Nupx98BtIHfs2IHU1FQsXrwYZ86cQWhoKKKjo3H9+nVbd80i9fX1CA0NRVZWlujrK1euxAcffIAPP/wQJ0+ehIuLC6Kjo9HQ0PCAe2qZo0ePQq1W48SJE8jLy0NTUxOioqJQX19vbDNv3jzs27cPOTk5OHr0KKqqqjBlyhQb9robYJwaPHgwU6vVxuctLS3Mx8eHpaen27BXnQOA7dq1y/jcYDAwlUrFVq1aZaxVV1czmUzGPv30Uxv0sOOuX7/OALCjR48yxu6uh6OjI8vJyTG2uXDhAgPAioqKbNVN7nG5hWxsbERxcTEiIyONNTs7O0RGRqKoqMiGPbOu8vJyaDQawXoqFAoMGTKk262nTqcDAPTs2RMAUFxcjKamJsG69e/fH35+ft1u3R4kLgN58+ZNtLS0QKlUCupKpRIajcZGvbK+1nXp7utpMBiQkpKCYcOGYeDAgQDurptUKoW7u7ugbXdbtweNu7s9SPejVqtx9uxZHD9+3NZd6fa43EL27t0b9vb2JiNyWq0WKpXKRr2yvtZ16c7rmZSUhP3796OgoMB42xxwd90aGxtRXV0taN+d1s0WuAykVCpFeHg48vPzjTWDwYD8/HxERETYsGfWFRgYCJVKJVjPmpoanDx5kvv1ZIwhKSkJu3btwuHDhxEYGCh4PTw8HI6OjoJ1Ky0txdWrV7lfN5uy9ahSW7Zv385kMhnbtGkTO3/+PJs9ezZzd3dnGo3G1l2zSG1tLSspKWElJSUMAFuzZg0rKSlhV65cYYwxtnz5cubu7s727NnDvv32W/biiy+ywMBAdufOHRv3/P5ef/11plAo2JEjR9i1a9eMj9u3bxvbvPbaa8zPz48dPnyYff311ywiIoJFRETYsNf84zaQjDGWmZnJ/Pz8mFQqZYMHD2YnTpywdZcsVlBQwHD367oEj/j4eMbY3VMfb7/9NlMqlUwmk7Fx48ax0tJS23baDGLrBIBlZ2cb29y5c4fNmTOHeXh4sB49erDJkyeza9eu2a7T3QDdfkUIR7g8hiTkUUWBJIQjFEhCOEKBJIQjFEhCOEKBJIQjFEhCOEKBJIQjFEhCOEKBJIQjFEhCOEKBJIQj/wceQqurc2vaTQAAAABJRU5ErkJggg==", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAOQAAADcCAYAAABgfg1NAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/av/WaAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAZJklEQVR4nO3de1SUdf4H8PdwmeE+CMLMkICUt9SQXUJjvesIq2mh7lrb7ll026QESz27/qSzSWueyMtRSonaTsJmmh5qvWaYIkqamiKdVi0iDyqGM+r+YkDkPt/fH/6YbZxRGRicL/h+nfOc43yf7zzzeWZ8832e7zMXhRBCgIik4ObqAojovxhIIokwkEQSYSCJJMJAEkmEgSSSCANJJBEGkkgiDCSRRLpdIPv27YvZs2dbbh88eBAKhQIHDx502mMoFAq8+uqrTtse2de3b19MnTrVqdvs7q+dQ4HMy8uDQqGwLF5eXhgwYADS0tJgNBq7qsYusWfPnm79wt3N9evXkZGRgV//+tcICgqCQqFAXl5ep7Z5/vx5KBQKrF692jlF9hBbt27FH/7wB/Tv3x8KhQLjxo3r8LY8OnKnZcuWISoqCg0NDTh8+DBycnKwZ88enD59Gj4+Ph0upiPGjBmD+vp6KJVKh+63Z88eZGdn2w1lfX09PDw69NRI49q1a1i2bBkiIiIwbNgwpx5BkLWcnByUlJQgLi4O//nPfzq1rQ79r5s8eTIeffRRAMCf//xnBAcHY82aNdixYwd+97vf2b1PXV0dfH19O17pbbi5ucHLy8up23T29lxBp9Ph8uXL0Gq1OHnyJOLi4lxdUo+1ceNGPPDAA3Bzc8PQoUM7tS2nnENOmDABAFBRUQEAmD17Nvz8/HDu3DlMmTIF/v7++P3vfw8AMJvNyMrKwpAhQ+Dl5QWNRoOUlBT89NNPVtsUQmD58uXo06cPfHx8MH78eJw5c8bmsW93Dnn8+HFMmTIFvXr1gq+vL6Kjo/Hmm29a6svOzgYAq0PwNvbOQ0pLSzF58mQEBATAz88PEydOxLFjx6z6tB3SHzlyBIsWLUJISAh8fX0xffp0XL161cFntXNUKhW0Wu09fcw2ubm5mDBhAkJDQ6FSqTB48GDk5OTctv/nn3+OmJgYeHl5YfDgwfjXv/5l06e6uhoLFixAeHg4VCoV+vXrhxUrVsBsNnflrrRLeHg43NycMx3jlOOyc+fOAQCCg4MtbS0tLUhMTMSoUaOwevVqy6FsSkoK8vLyMGfOHLz44ouoqKjA+vXrUVpaiiNHjsDT0xMAsHTpUixfvhxTpkzBlClTcOrUKSQkJKCpqemu9ezbtw9Tp06FTqfDSy+9BK1Wi2+//Ra7d+/GSy+9hJSUFFRVVWHfvn3YuHHjXbd35swZjB49GgEBAVi8eDE8PT3x7rvvYty4cTh06BBGjBhh1X/+/Pno1asXMjIycP78eWRlZSEtLQ1bt2694+M0Njaitrb2rvUAQO/evdvVzxVycnIwZMgQPPHEE/Dw8MCuXbswb948mM1mpKamWvUtLy/HU089heeffx7JycnIzc3Fb3/7WxQUFGDSpEkAgBs3bmDs2LH48ccfkZKSgoiICHz55ZdIT0/H5cuXkZWV5XCN165da1c/f39/qFQqh7ffYcIBubm5AoDYv3+/uHr1qqisrBRbtmwRwcHBwtvbW1y6dEkIIURycrIAIJYsWWJ1/y+++EIAEJs2bbJqLygosGq/cuWKUCqV4vHHHxdms9nS7+WXXxYARHJysqWtqKhIABBFRUVCCCFaWlpEVFSUiIyMFD/99JPV4/x8W6mpqeJ2uw9AZGRkWG4nJSUJpVIpzp07Z2mrqqoS/v7+YsyYMTbPj16vt3qshQsXCnd3d1FdXW338W69f3sWR5w4cUIAELm5uQ7d71YVFRUCgFi1atUd+924ccOmLTExUTz44INWbZGRkQKA+OSTTyxtJpNJ6HQ68Ytf/MLS9tprrwlfX1/x/fffW91/yZIlwt3dXVy8eNHSdutrdzvtfZ4dfc6GDBkixo4d69B9fq5DI6Rer7e6HRkZiU2bNuGBBx6wan/hhResbufn50OtVmPSpElWf6FiY2Ph5+eHoqIiPPPMM9i/fz+ampowf/58q0PJBQsW4PXXX79jbaWlpaioqMDatWsRGBhote7n22qv1tZWfP7550hKSsKDDz5oadfpdHjmmWfw3nvvoaamBgEBAZZ1c+fOtXqs0aNHY+3atbhw4QKio6Nv+1iJiYnYt2+fwzXKxtvb2/Jvk8mE5uZmjB07Fnv37oXJZIJarbasDwsLw/Tp0y23AwIC8Mc//hErVqyAwWCAVqtFfn4+Ro8ejV69eln9v9Hr9XjjjTdQXFxsOSVqr/Y+z0OGDHFou53VoUBmZ2djwIAB8PDwgEajwcCBA22OoT08PNCnTx+rtvLycphMJoSGhtrd7pUrVwAAFy5cAAD079/fan1ISAh69ep1x9raDp87e3Ld5urVq7hx4wYGDhxos+7hhx+G2WxGZWWl1QsXERFh1a+t5lvPk2+l0+mg0+mcULVrHTlyBBkZGTh69Chu3Lhhte7WQPbr18/mD+WAAQMA3LzMotVqUV5ejm+++QYhISF2H6/t/40jbh1UZNGhQA4fPtwyy3o7KpXKJqRmsxmhoaHYtGmT3fvc7gnvbtzd3e22i7t8W0p9fT1MJlO7HsNVEzZ3c+7cOUycOBGDBg3CmjVrEB4eDqVSiT179mDt2rUdmoQxm82YNGkSFi9ebHd9W4AdYTAY2tVPrVZbjfhd7Z5ebHvooYewf/9+jBw58o47GRkZCeDmiPrzw8SrV6/edZR56KGHAACnT5++41/B9h6+hoSEwMfHB2VlZTbrvvvuO7i5uSE8PLxd27qbrVu3Ys6cOe3qe7dwu8quXbvQ2NiInTt3Wh0pFBUV2e3/ww8/QAhh9Xp8//33AG6+kwe4+Zpev37dqaNae49EcnNzrd4Z1tXuaSBnzZqFt99+G6+99prNuWBLSwuuX7+OwMBA6PV6eHp6Yt26dUhISLC8WO2ZTfvlL3+JqKgoZGVlYfbs2VbnkT9/4duuiVZXV9uca/6cu7s7EhISsGPHDpw/f97yn8RoNGLz5s0YNWqU1fljZ/SEc8i2o4Of/8EwmUzIzc2127+qqgrbtm3DjBkzAAA1NTX44IMPEBMTYzkKmDVrFl599VXs3bsXiYmJVvevrq6Gn5+fw2/k6FHnkB01duxYpKSkIDMzE19//TUSEhLg6emJ8vJy5Ofn480338RvfvMbhISE4C9/+QsyMzMxdepUTJkyBaWlpfjss8/uOt3v5uaGnJwcTJs2DTExMZgzZw50Oh2+++47nDlzBnv37gVwcyIJAF588UUkJibC3d0dTz/9tN1tLl++HPv27cOoUaMwb948eHh44N1330VjYyNWrlzptOfH2eeQ69evR3V1NaqqqgDcHL0uXboE4OalmbZzubbLUO0dDQoLC9HQ0GDTnpSUhISEBCiVSkybNg0pKSm4fv063nvvPYSGhuLy5cs29xkwYACeffZZnDhxAhqNBhs2bIDRaLQK8F//+lfs3LkTU6dOxezZsxEbG4u6ujr8+9//xscff4zz5887fBnImaNtcXExiouLAdw8iqurq8Py5csB3Hwn2ZgxY9q/MUemZNum5U+cOHHHfsnJycLX1/e26//xj3+I2NhY4e3tLfz9/cUjjzwiFi9eLKqqqix9Wltbxd///neh0+mEt7e3GDdunDh9+rSIjIy842WPNocPHxaTJk0S/v7+wtfXV0RHR4t169ZZ1re0tIj58+eLkJAQoVAorC4lwM7U+alTp0RiYqLw8/MTPj4+Yvz48eLLL79s1/Nzuxq7WttlBXtLRUWFpd+6desEAFFQUHDH7bVd9rjdsnHjRiGEEDt37hTR0dHCy8tL9O3bV6xYsUJs2LDB5nEjIyPF448/Lvbu3Suio6OFSqUSgwYNEvn5+TaPXVtbK9LT00W/fv2EUqkUvXv3Fr/61a/E6tWrRVNTk6Wfvdeuq2VkZNz2OXG0FoUQkp6M0D0za9YsnD9/Hl999ZWrS7nvde93UFOnCSFw8OBBfPjhh64uhQBwhCSSSLf7gDJRT8ZAEkmEgSSSCANJJJEum2XNzs7GqlWrYDAYMGzYMKxbtw7Dhw+/6/3MZjOqqqrg7+/foU9nELWHEAK1tbUICwtz2oeLncLZF0mFEGLLli1CqVSKDRs2iDNnzojnnntOBAYGCqPReNf7VlZWtvuzaly4dHaprKzsigh0WJdc9hgxYgTi4uKwfv16ADdHvfDwcMyfPx9Lliy5431NJhMCAwMxClPgAU9nl0YEAGhBMw5jD6qrq60+DuZqTj9kbWpqQklJCdLT0y1tbm5u0Ov1OHr0qE3/xsZGNDY2Wm63fYWFBzzhoWAgqYv8/zAk22mR0w+er127htbWVmg0Gqt2jUZj9zNomZmZUKvVlsVZH2Ui6o5cfjabnp4Ok8lkWSorK11dEpHLOP2QtXfv3nB3d7f5JnOj0Wj3U+4qlerefqsXkcScPkIqlUrExsaisLDQ0mY2m1FYWIj4+HhnPxxRj9Il1yEXLVqE5ORkPProoxg+fDiysrJQV1fX7q+nILpfdUkgn3rqKVy9ehVLly6FwWBATEwMCgoKbCZ6iMiadB+/qqmpgVqtxjg8ycse1GVaRDMOYgdMJpPTvhPJGVw+y0pE/8VAEkmEgSSSCANJJBEGkkgiDCSRRBhIIokwkEQSYSCJJMJAEkmEgSSSCANJJBH+2M59SBFr+yOkn+7caLfvI++k2bSFv/al02uimzhCEkmEgSSSCANJJBEGkkginNS5D12Js/2EfAta7fb1qZLqCyV6PI6QRBJhIIkkwkASSYSBJJIIA0kkEc6y3od+iradUb3U0minJxD8vu1PCFLX4QhJJBEGkkgiDCSRRBhIIolwUqcHEyNj7LZ/MXWNTdvY4vl2+/ZDqTNLorvgCEkkEQaSSCIMJJFEGEgiiTCQRBLhLGsP9r+Dve2269x9bNoe+Jg/Hy8DjpBEEmEgiSTCQBJJhIEkkggndXqwifPsf5Zxe12gTZvfwTK7fe1/Fx11FY6QRBJhIIkkwkASSYSBJJKIw4EsLi7GtGnTEBYWBoVCge3bt1utF0Jg6dKl0Ol08Pb2hl6vR3l5ubPqJerRHJ5lraurw7Bhw/CnP/0JM2bMsFm/cuVKvPXWW/jnP/+JqKgovPLKK0hMTMTZs2fh5eXllKLJlvuQgTZtr4d+ZLfv+zV9bNpaq01Or4kc53AgJ0+ejMmTJ9tdJ4RAVlYW/va3v+HJJ58EAHzwwQfQaDTYvn07nn766c5VS9TDOfUcsqKiAgaDAXq93tKmVqsxYsQIHD1q/5pYY2MjampqrBai+5VTA2kwGAAAGo3Gql2j0VjW3SozMxNqtdqyhIeHO7Mkom7F5bOs6enpMJlMlqWystLVJRG5jFPfOqfVagEARqMROp3O0m40GhETE2P3PiqVCiqVypll3Jd+nBTc7r4ltZF2WuudVwx1mFNHyKioKGi1WhQWFlraampqcPz4ccTHxzvzoYh6JIdHyOvXr+OHH36w3K6oqMDXX3+NoKAgREREYMGCBVi+fDn69+9vuewRFhaGpKQkZ9ZN1CM5HMiTJ09i/PjxltuLFi0CACQnJyMvLw+LFy9GXV0d5s6di+rqaowaNQoFBQW8BknUDg4Hcty4cRBC3Ha9QqHAsmXLsGzZsk4VRnQ/cvksKxH9Fz+g3EPUDG5ud9+v18fYtAWCP8wqA46QRBJhIIkkwkASSYSBJJIIJ3W6mcbJcXbbdySss2lbdi3Wbt+gT76xaTN3rixyEo6QRBJhIIkkwkASSYSBJJIIA0kkEc6ydjOXJth/yaKVtp+mST7/iN2+oXXfObUmch6OkEQSYSCJJMJAEkmEgSSSCCd1upmQoVfstrcK2ze/eezo1dXlkJNxhCSSCANJJBEGkkgiDCSRRBhIIolwllViHlG2v8GxemC+3b7vmWx/NSxoA79JrrvhCEkkEQaSSCIMJJFEGEgiiXBSR2LlKWE2bY/d5rdtnzs13qYtHKedXRJ1MY6QRBJhIIkkwkASSYSBJJIIA0kkEc6ySswc3tDuvvXVtt86R90PR0giiTCQRBJhIIkkwkASSYSTOhJ7e8SH7e77wGfuXVgJ3SscIYkkwkASSYSBJJIIA0kkEYcCmZmZibi4OPj7+yM0NBRJSUkoKyuz6tPQ0IDU1FQEBwfDz88PM2fOhNFodGrRRD2VQ7Oshw4dQmpqKuLi4tDS0oKXX34ZCQkJOHv2LHx9fQEACxcuxKeffor8/Hyo1WqkpaVhxowZOHLkSJfsQE/QMG243fZRXl/ZaeXEeE/m0KtbUFBgdTsvLw+hoaEoKSnBmDFjYDKZ8P7772Pz5s2YMGECACA3NxcPP/wwjh07hscee8x5lRP1QJ06hzSZTACAoKAgAEBJSQmam5uh1+stfQYNGoSIiAgcPWr/O0IbGxtRU1NjtRDdrzocSLPZjAULFmDkyJEYOnQoAMBgMECpVCIwMNCqr0ajgcFgsLudzMxMqNVqyxIebvuFv0T3iw4HMjU1FadPn8aWLVs6VUB6ejpMJpNlqays7NT2iLqzDs0QpKWlYffu3SguLkafPn0s7VqtFk1NTaiurrYaJY1GI7Rard1tqVQqqFS3+Sq1+8TFJ4TddpXC9uVZdu0Ru339dpTYtNnfKsnMoRFSCIG0tDRs27YNBw4cQFRUlNX62NhYeHp6orCw0NJWVlaGixcvIj4+3jkVE/VgDo2Qqamp2Lx5M3bs2AF/f3/LeaFarYa3tzfUajWeffZZLFq0CEFBQQgICMD8+fMRHx/PGVaidnAokDk5OQCAcePGWbXn5uZi9uzZAIC1a9fCzc0NM2fORGNjIxITE/H22287pViins6hQApx97MSLy8vZGdnIzs7u8NFEd2v+F5WIonwfVj3mHtAgE3b/4zc0+77b/5sjN32B1v446w9AUdIIokwkEQSYSCJJMJAEkmEkzr3mLmx0abt7A3bH2YFAP2Pj9q09X/9jN2+rZ0riyTBEZJIIgwkkUQYSCKJMJBEEmEgiSTCWdZ7TNiZZS2znUwFAChxwaaNs6k9G0dIIokwkEQSYSCJJMJAEkmEgSSSCANJJBEGkkgiDCSRRBhIIokwkEQSYSCJJMJAEkmEgSSSCANJJBEGkkgiDCSRRBhIIokwkEQSYSCJJMJAEklEui+5avuV5hY0A3f/wWaiDmlBM4D2/Sr4vSRdIGtrawEAh9H+HzEl6qja2lqo1WpXl2GhEJL9iTCbzaiqqoK/vz9qa2sRHh6OyspKBNj55eHurKamhvvmQkII1NbWIiwsDG5u8py5STdCurm5oU+fPgAAhUIBAAgICJD2he0s7pvryDQytpHnTwMRMZBEMpE6kCqVChkZGVCpVK4uxem4b2SPdJM6RPczqUdIovsNA0kkEQaSSCIMJJFEpA5kdnY2+vbtCy8vL4wYMQJfffWVq0tyWHFxMaZNm4awsDAoFAps377dar0QAkuXLoVOp4O3tzf0ej3Ky8tdU6wDMjMzERcXB39/f4SGhiIpKQllZWVWfRoaGpCamorg4GD4+flh5syZMBqNLqq4e5A2kFu3bsWiRYuQkZGBU6dOYdiwYUhMTMSVK1dcXZpD6urqMGzYMGRnZ9tdv3LlSrz11lt45513cPz4cfj6+iIxMRENDQ33uFLHHDp0CKmpqTh27Bj27duH5uZmJCQkoK6uztJn4cKF2LVrF/Lz83Ho0CFUVVVhxowZLqy6GxCSGj58uEhNTbXcbm1tFWFhYSIzM9OFVXUOALFt2zbLbbPZLLRarVi1apWlrbq6WqhUKvHRRx+5oMKOu3LligAgDh06JIS4uR+enp4iPz/f0ufbb78VAMTRo0ddVab0pBwhm5qaUFJSAr1eb2lzc3ODXq/H0aNHXViZc1VUVMBgMFjtp1qtxogRI7rdfppMJgBAUFAQAKCkpATNzc1W+zZo0CBERER0u327l6QM5LVr19Da2gqNRmPVrtFoYDAYXFSV87XtS3ffT7PZjAULFmDkyJEYOnQogJv7plQqERgYaNW3u+3bvSbdpz2o+0lNTcXp06dx+PBhV5fS7Uk5Qvbu3Rvu7u42M3JGoxFardZFVTlf27505/1MS0vD7t27UVRUZPnYHHBz35qamlBdXW3VvzvtmytIGUilUonY2FgUFhZa2sxmMwoLCxEfH+/CypwrKioKWq3Waj9rampw/Phx6fdTCIG0tDRs27YNBw4cQFRUlNX62NhYeHp6Wu1bWVkZLl68KP2+uZSrZ5VuZ8uWLUKlUom8vDxx9uxZMXfuXBEYGCgMBoOrS3NIbW2tKC0tFaWlpQKAWLNmjSgtLRUXLlwQQgjxxhtviMDAQLFjxw7xzTffiCeffFJERUWJ+vp6F1d+Zy+88IJQq9Xi4MGD4vLly5blxo0blj7PP/+8iIiIEAcOHBAnT54U8fHxIj4+3oVVy0/aQAohxLp160RERIRQKpVi+PDh4tixY64uyWFFRUUCN7+uy2pJTk4WQty89PHKK68IjUYjVCqVmDhxoigrK3Nt0e1gb58AiNzcXEuf+vp6MW/ePNGrVy/h4+Mjpk+fLi5fvuy6orsBfvyKSCJSnkMS3a8YSCKJMJBEEmEgiSTCQBJJhIEkkggDSSQRBpJIIgwkkUQYSCKJMJBEEmEgiSTyf+peplweLaBHAAAAAElFTkSuQmCC", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import matplotlib.pyplot as plt\n", - "\n", - "sample_images = x_test[:3]\n", - "sample_labels = y_test[:3]\n", - "preds = predictor.predict(sample_images)[\"predictions\"].argmax(1)\n", - "for image, pred, label in zip(sample_images, preds, sample_labels):\n", - " plt.figure(figsize=(2, 2))\n", - " plt.title(f\"Prediction = {pred}, Label = {label}\")\n", - " plt.imshow(image.reshape((28, 28)))\n", - " plt.show()" - ] - }, - { - "cell_type": "markdown", - "id": "2a9e7dd5", - "metadata": {}, - "source": [ - "### Using Ray AIR for scalable batch prediction\n", - "\n", - "Although what we did above works for a small amount of test data, we can use Ray AIR's {class}`BatchPredictor ` to do scalable prediction on a much larger dataset.\n", - "\n", - "We can create a `BatchPredictor` from a checkpoint. We pass in the predictor class `TensorflowPredictor` and the `model_definition` of the checkpointed model." - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "94400a99", - "metadata": {}, - "outputs": [], - "source": [ - "import ray\n", - "from ray.train.batch_predictor import BatchPredictor\n", - "from ray.train.tensorflow import TensorflowPredictor\n", - "\n", - "checkpoint: TensorflowCheckpoint = result.checkpoint\n", - "\n", - "batch_predictor = BatchPredictor.from_checkpoint(\n", - " checkpoint,\n", - " TensorflowPredictor,\n", - " # A function that returns the model architecture\n", - " model_definition=build_model,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "fd72830b", - "metadata": {}, - "source": [ - "Batch predictors work with [Ray Data](data). Here, we create a {class}`Dataset ` of images from our test set." - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "58bf6e2a", - "metadata": {}, - "outputs": [], - "source": [ - "test_images_ds = ray.data.from_items(x_test)" - ] - }, - { - "cell_type": "markdown", - "id": "6ab1b08a", - "metadata": {}, - "source": [ - "Let's run {meth}`BatchPredictor.predict ` on our Dataset. This will distribute the prediction across a specified number of workers!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c715c713", - "metadata": {}, - "outputs": [], - "source": [ - "predict_results = batch_predictor.predict(test_images_ds)" - ] - }, - { - "cell_type": "markdown", - "id": "9ccadf89", - "metadata": {}, - "source": [ - "`predict_results` is also a Dataset, and we can take a look at the predictions inside:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f423e247", - "metadata": {}, - "outputs": [], - "source": [ - "predict_results.show()" - ] - }, - { - "cell_type": "markdown", - "id": "e31bb5b6", - "metadata": {}, - "source": [ - "```\n", - "{'predictions': array([ -2.5820212, -5.676518 , 2.0977738, 3.0335152, -6.936249 ,\n", - " -2.4465032, -12.235596 , 9.626698 , -2.7761698, -1.363163 ],\n", - " dtype=float32)}\n", - "{'predictions': array([ -2.6007364, 6.3847456, 11.046391 , 4.3116417, -12.745696 ,\n", - " 1.6003606, -2.794311 , -6.191238 , 2.4532976, -9.940281 ],\n", - " dtype=float32)}\n", - "...\n", - "```\n", - "\n", - "Our model outputs logits, but we want the actual predicted labels. We can convert the logits to labels by taking the `argmax` of each model output in `predict_results` using {meth}`map_batches `. Then, we can compute the accuracy by comparing to the test set labels!" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "id": "7aea49e1", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Map_Batches: 100%|██████████| 3/3 [00:01<00:00, 1.56it/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Prediction Accuracy = 0.976\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "(BlockWorker pid=15393) 2022-10-17 16:44:40.531349: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/nvidia/lib:/usr/local/nvidia/lib64\n", - "(BlockWorker pid=15393) 2022-10-17 16:44:40.531385: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)\n" - ] - } - ], - "source": [ - "predicted_classes = predict_results.map_batches(\n", - " lambda batch: {\"pred\": [pred.argmax(0) for pred in batch[\"predictions\"]]}, \n", - " batch_format=\"pandas\"\n", - ")\n", - "predicted_classes_np = predicted_classes.take_batch(float(\"inf\"))[\"pred\"]\n", - "\n", - "pred_accuracy = (predicted_classes_np == y_test).astype(int).sum() / len(predicted_classes_np)\n", - "print(\"Prediction Accuracy =\", pred_accuracy)" - ] - }, - { + "attachments": {}, "cell_type": "markdown", "id": "c02abb39", "metadata": {}, @@ -829,20 +615,12 @@ "- save and retrieve model checkpoints via Ray AIR\n", "- load a model for batch prediction\n", "\n", - "In our [other examples](air-examples-ref) you can learn how to do more things with the Ray AIR API, such as **serving your model with Ray Serve** or **tune your hyperparameters with Ray Tune**. You can also learn how to **construct Ray Data** to leverage Ray AIR’s **preprocessing** API.\n", + "In our [other examples](air-examples-ref) you can learn how to do more things with the Ray AIR API, such as **serving your model with Ray Serve** or **tune your hyperparameters with Ray Tune**. You can also learn how to perform {ref}`offline batch inference ` with Ray Data.\n", "\n", "See [this table](train-framework-catalog) for a full catalog of frameworks that AIR supports out of the box.\n", "\n", "We hope this tutorial gave you a good starting point to leverage Ray AIR. If you have any questions, suggestions, or run into any problems pelase reach out on [Discuss](https://discuss.ray.io/), [GitHub](https://github.com/ray-project/ray) or the [Ray Slack](https://forms.gle/9TSdDYUgxYs8SA9e8)!" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fdc44eb3", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/doc/source/ray-air/examples/gptj_deepspeed_fine_tuning.ipynb b/doc/source/ray-air/examples/gptj_deepspeed_fine_tuning.ipynb index 9a63a6c748f3..001160db8262 100644 --- a/doc/source/ray-air/examples/gptj_deepspeed_fine_tuning.ipynb +++ b/doc/source/ray-air/examples/gptj_deepspeed_fine_tuning.ipynb @@ -1017,7 +1017,7 @@ "We can use the {class}`~ray.train.huggingface.huggingface_predictor.TransformersPredictor` to generate predictions from our fine-tuned model.\n", "\n", "```{tip}\n", - "For large scale batch inference, consider configuring cloud checkpointing and then pass the cloud-backed {class}`~ray.air.checkpoint.Checkpoint` to {class}`~ray.train.batch_predictor.BatchPredictor`. More information [here](air-predictors).\n", + "For large scale batch inference, see {ref}`End-to-end: Offline Batch Inference `.\n", "```\n", "\n", "Because the {class}`~ray.train.huggingface.huggingface_predictor.TransformersPredictor` uses a 🤗 Transformers [`pipeline`](https://huggingface.co/docs/transformers/en/main_classes/pipelines) under the hood, we disable the tokenizer AIR Preprocessor we have used for training and let the `pipeline` to tokenize the data itself." diff --git a/doc/source/ray-air/examples/huggingface_text_classification.ipynb b/doc/source/ray-air/examples/huggingface_text_classification.ipynb index d9a0cb5c3e97..fbac21e8b873 100644 --- a/doc/source/ray-air/examples/huggingface_text_classification.ipynb +++ b/doc/source/ray-air/examples/huggingface_text_classification.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -8,6 +9,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "VaFMt6AIhYbK" @@ -20,11 +22,11 @@ "2. [Load the dataset](#load)\n", "3. [Preprocess the dataset with Ray AIR](#preprocess)\n", "4. [Run the training with Ray AIR](#train)\n", - "5. [Predict on test data with Ray AIR](#predict)\n", - "6. [Optionally, share the model with the community](#share)" + "5. [Optionally, share the model with the community](#share)" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "sQbdfyWQhYbO" @@ -45,6 +47,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "pvSRaEHChYbP" @@ -54,6 +57,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "LRdL3kWBhYbQ" @@ -146,6 +150,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "oJiSdWy2hYbR" @@ -187,6 +192,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "uS6oeJELhYbS" @@ -212,6 +218,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "rEJBSTyZIrIb" @@ -221,6 +228,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "kTCFado4IrIc" @@ -243,6 +251,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "4RRkXuteIrIh" @@ -265,6 +274,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "whPRbBNbIrIl" @@ -274,6 +284,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "W7QYTpxXIrIl" @@ -306,6 +317,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "RzfPtOMoIrIu" @@ -315,6 +327,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "_TOee7nohYbW" @@ -338,6 +351,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "lnjDIuQ3IrI-" @@ -347,6 +361,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "n9qywopnIrJH" @@ -356,6 +371,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "YVx71GdAIrJH" @@ -388,6 +404,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "Vl6IidfdIrJK" @@ -397,6 +414,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "qo_0B1M2IrJM" @@ -428,6 +446,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "256fOuzjhYbY" @@ -462,6 +481,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "2C0hcmp9IrJQ" @@ -502,6 +522,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "545PP3o8IrJV" @@ -511,6 +532,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "FBiW8UpKIrJW" @@ -591,6 +613,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "CdzABDVcIrJg" @@ -633,6 +656,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "XvS136zKhYba" @@ -1007,6 +1031,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "4cnWqUWmhYba" @@ -1042,6 +1067,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -1049,6 +1075,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -1816,6 +1843,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -2032,85 +2060,7 @@ ] }, { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Predict on test data with Ray AIR " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Tfoyu1q7hYbb" - }, - "source": [ - "You can now use the checkpoint to run prediction with `TransformersPredictor`, which wraps around [🤗 Pipelines](https://huggingface.co/docs/transformers/main_classes/pipelines). In order to distribute prediction, we use `BatchPredictor`. While this is not necessary for the very small example we are using (you could use `TransformersPredictor` directly), it will scale well to a large dataset." - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 262 - }, - "id": "UOUcBkX8IrJi", - "outputId": "4dc16812-1400-482d-8c3f-85991ce4b081" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Map_Batches: 100%|██████████| 1/1 [00:00<00:00, 12.41it/s]\n", - "Map_Batches: 100%|██████████| 1/1 [00:00<00:00, 7.46it/s]\n", - "Map Progress (1 actors 1 pending): 100%|██████████| 1/1 [00:18<00:00, 18.46s/it]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'label': 'LABEL_1', 'score': 0.6822417974472046}\n", - "{'label': 'LABEL_1', 'score': 0.6822402477264404}\n", - "{'label': 'LABEL_1', 'score': 0.6822407841682434}\n", - "{'label': 'LABEL_1', 'score': 0.6822386980056763}\n", - "{'label': 'LABEL_1', 'score': 0.6822428107261658}\n", - "{'label': 'LABEL_1', 'score': 0.6822453737258911}\n", - "{'label': 'LABEL_1', 'score': 0.6822437047958374}\n", - "{'label': 'LABEL_1', 'score': 0.6822428703308105}\n", - "{'label': 'LABEL_1', 'score': 0.6822431683540344}\n", - "{'label': 'LABEL_1', 'score': 0.6822426915168762}\n", - "{'label': 'LABEL_1', 'score': 0.6822447776794434}\n", - "{'label': 'LABEL_1', 'score': 0.6822456121444702}\n", - "{'label': 'LABEL_1', 'score': 0.6822471022605896}\n", - "{'label': 'LABEL_1', 'score': 0.6822477579116821}\n", - "{'label': 'LABEL_1', 'score': 0.682244598865509}\n", - "{'label': 'LABEL_1', 'score': 0.6822422742843628}\n", - "{'label': 'LABEL_1', 'score': 0.6822470426559448}\n", - "{'label': 'LABEL_1', 'score': 0.6822417378425598}\n", - "{'label': 'LABEL_1', 'score': 0.6822449564933777}\n", - "{'label': 'LABEL_1', 'score': 0.682239294052124}\n" - ] - } - ], - "source": [ - "from ray.train.huggingface import TransformersPredictor\n", - "from ray.train.batch_predictor import BatchPredictor\n", - "import pandas as pd\n", - "\n", - "predictor = BatchPredictor.from_checkpoint(\n", - " checkpoint=best_result.checkpoint,\n", - " predictor_cls=TransformersPredictor,\n", - " task=\"text-classification\",\n", - " device=0 if use_gpu else -1, # -1 is CPU, otherwise device index\n", - ")\n", - "prediction = predictor.predict(ray_datasets[\"test\"].map_batches(lambda x: x[[\"sentence\"]], batch_format=\"pandas\"), num_gpus_per_worker=int(use_gpu))\n", - "prediction.show()" - ] - }, - { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -2118,6 +2068,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "mS8PId_NhYbb" @@ -2147,6 +2098,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "SybKUDryhYbb" @@ -2170,6 +2122,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "5fr6E0e8hYbb" @@ -2196,6 +2149,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "tgV2xKfFhYbc" @@ -2219,6 +2173,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "UL-Boc4dhYbc" @@ -2233,6 +2188,16 @@ "```" ] }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Next steps\n", + "\n", + "- {ref}`End-to-end: Offline Batch Inference `" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/doc/source/ray-air/examples/index.rst b/doc/source/ray-air/examples/index.rst index ede76789908a..8e41c9215e77 100644 --- a/doc/source/ray-air/examples/index.rst +++ b/doc/source/ray-air/examples/index.rst @@ -28,7 +28,6 @@ Text/NLP - :doc:`/ray-air/examples/gptj_batch_prediction`: How to use Ray AIR to do batch prediction with the Hugging Face Transformers GPT-J model. - :doc:`/ray-air/examples/gptj_serving`: How to use Ray AIR to do online serving with the Hugging Face Transformers GPT-J model. - :doc:`/ray-air/examples/dreambooth_finetuning`: How to fine-tune a DreamBooth text-to-image model with your own images. -- :doc:`/ray-air/examples/opt_deepspeed_batch_inference`: How to run batch inference on a dataset of texts with a 30B OPT model. - :doc:`/ray-air/examples/dolly_lightning_fsdp_finetuning`: How to fine-tune a dolly-v2-7b model with Ray AIR LightningTrainer and FSDP. Image/CV diff --git a/doc/source/ray-air/examples/lightgbm_example.ipynb b/doc/source/ray-air/examples/lightgbm_example.ipynb index 2798c05babfc..d0cf8ea7b3e3 100644 --- a/doc/source/ray-air/examples/lightgbm_example.ipynb +++ b/doc/source/ray-air/examples/lightgbm_example.ipynb @@ -13,6 +13,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "07d92cee", "metadata": {}, @@ -25,12 +26,23 @@ "execution_count": 1, "id": "86131abe", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.1.2\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" + ] + } + ], "source": [ "!pip install -qU \"ray[tune]\" lightgbm_ray" ] }, { + "attachments": {}, "cell_type": "markdown", "id": "135fc884", "metadata": {}, @@ -40,15 +52,25 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 2, "id": "102ef1ac", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/balaji/Documents/GitHub/ray/.venv/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n", + "2023-07-07 14:34:14,951\tINFO util.py:159 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.\n", + "2023-07-07 14:34:15,892\tINFO util.py:159 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.\n" + ] + } + ], "source": [ "from typing import Tuple\n", "\n", "import ray\n", - "from ray.train.batch_predictor import BatchPredictor\n", "from ray.train.lightgbm import LightGBMPredictor\n", "from ray.data.preprocessors.chain import Chain\n", "from ray.data.preprocessors.encoder import Categorizer\n", @@ -60,6 +82,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "c7d102bd", "metadata": {}, @@ -69,7 +92,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 3, "id": "f1f35cd7", "metadata": {}, "outputs": [], @@ -82,6 +105,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "8f7afbce", "metadata": {}, @@ -91,7 +115,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 4, "id": "fefcbc8a", "metadata": {}, "outputs": [], @@ -127,6 +151,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "04d278ae", "metadata": {}, @@ -136,30 +161,42 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 5, "id": "3f1d0c19", "metadata": {}, "outputs": [], "source": [ + "import pandas as pd\n", + "from ray.air import Checkpoint\n", + "from ray.data import ActorPoolStrategy\n", + "\n", + "\n", + "class Predict:\n", + "\n", + " def __init__(self, checkpoint: Checkpoint):\n", + " self.predictor = LightGBMPredictor.from_checkpoint(checkpoint)\n", + "\n", + " def __call__(self, batch: pd.DataFrame) -> pd.DataFrame:\n", + " return self.predictor.predict(batch)\n", + "\n", + "\n", "def predict_lightgbm(result: Result):\n", " _, _, test_dataset = prepare_data()\n", - " batch_predictor = BatchPredictor.from_checkpoint(\n", - " result.checkpoint, LightGBMPredictor\n", - " )\n", "\n", - " predicted_labels = (\n", - " batch_predictor.predict(test_dataset)\n", - " .map_batches(lambda df: (df > 0.5).astype(int), batch_format=\"pandas\")\n", + " scores = test_dataset.map_batches(\n", + " Predict, \n", + " fn_constructor_args=[result.checkpoint], \n", + " compute=ActorPoolStrategy(), \n", + " batch_format=\"pandas\"\n", " )\n", + " \n", + " predicted_labels = scores.map_batches(lambda df: (df > 0.5).astype(int), batch_format=\"pandas\")\n", " print(f\"PREDICTED LABELS\")\n", - " predicted_labels.show()\n", - "\n", - " shap_values = batch_predictor.predict(test_dataset, pred_contrib=True)\n", - " print(f\"SHAP VALUES\")\n", - " shap_values.show()" + " predicted_labels.show()" ] }, { + "attachments": {}, "cell_type": "markdown", "id": "2bb0e5df", "metadata": {}, @@ -169,29 +206,75 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 6, "id": "8244ff3c", "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2022-06-22 17:26:41,346\tWARNING read_api.py:260 -- The number of blocks in this dataset (1) limits its parallelism to 1 concurrent tasks. This is much less than the number of available CPU slots in the cluster. Use `.repartition(n)` to increase the number of dataset blocks.\n", - "Map_Batches: 100%|██████████| 1/1 [00:00<00:00, 46.26it/s]\n" - ] - }, { "data": { "text/html": [ - "== Status ==
Current time: 2022-06-22 17:26:56 (running for 00:00:14.07)
Memory usage on this node: 10.0/31.0 GiB
Using FIFO scheduling algorithm.
Resources requested: 0/8 CPUs, 0/0 GPUs, 0.0/13.32 GiB heap, 0.0/6.66 GiB objects
Result logdir: /home/ubuntu/ray_results/LightGBMTrainer_2022-06-22_17-26-41
Number of trials: 1/1 (1 TERMINATED)
\n", + "
\n", + "
\n", + "
\n", + "

Tune Status

\n", + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Current time:2023-07-07 14:34:34
Running for: 00:00:06.06
Memory: 12.2/64.0 GiB
\n", + " \n", + "
\n", + "
\n", + "

System Info

\n", + " Using FIFO scheduling algorithm.
Logical resource usage: 4.0/10 CPUs, 0/0 GPUs\n", + "
\n", + " \n", + " \n", + "
\n", + "
\n", + "

Trial Status

\n", + " \n", "\n", - "\n", + "\n", "\n", "\n", - "\n", + "\n", "\n", - "
Trial name status loc iter total time (s) train-binary_logloss train-binary_error valid-binary_logloss
Trial name status loc iter total time (s) train-binary_logloss train-binary_error valid-binary_logloss
LightGBMTrainer_7b049_00000TERMINATED172.31.43.110:1491578 100 10.9726 0.000574522 0 0.171898
LightGBMTrainer_0c5ae_00000TERMINATED127.0.0.1:10027 101 4.5829 0.000202293 0 0.130232


" + "\n", + "
\n", + "\n", + "\n" ], "text/plain": [ "" @@ -204,155 +287,100 @@ "name": "stderr", "output_type": "stream", "text": [ - "UserWarning: cpus_per_actor is set to less than 2. Distributed LightGBM needs at least 2 CPUs per actor to train efficiently. This may lead to a degradation of performance during training.\n", - "\u001b[2m\u001b[36m(pid=1491578)\u001b[0m /home/ubuntu/ray/venv/lib/python3.8/site-packages/dask/dataframe/backends.py:181: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - "\u001b[2m\u001b[36m(pid=1491578)\u001b[0m _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)\n", - "\u001b[2m\u001b[36m(pid=1491578)\u001b[0m /home/ubuntu/ray/venv/lib/python3.8/site-packages/dask/dataframe/backends.py:181: FutureWarning: pandas.Float64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - "\u001b[2m\u001b[36m(pid=1491578)\u001b[0m _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)\n", - "\u001b[2m\u001b[36m(pid=1491578)\u001b[0m /home/ubuntu/ray/venv/lib/python3.8/site-packages/dask/dataframe/backends.py:181: FutureWarning: pandas.UInt64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - "\u001b[2m\u001b[36m(pid=1491578)\u001b[0m _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)\n", - "\u001b[2m\u001b[36m(pid=1491578)\u001b[0m /home/ubuntu/ray/venv/lib/python3.8/site-packages/xgboost/compat.py:31: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - "\u001b[2m\u001b[36m(pid=1491578)\u001b[0m from pandas import MultiIndex, Int64Index\n", - "\u001b[2m\u001b[36m(LightGBMTrainer pid=1491578)\u001b[0m UserWarning: Dataset 'train' has 1 blocks, which is less than the `num_workers` 2. This dataset will be automatically repartitioned to 2 blocks.\n", - "\u001b[2m\u001b[36m(LightGBMTrainer pid=1491578)\u001b[0m UserWarning: Dataset 'valid' has 1 blocks, which is less than the `num_workers` 2. This dataset will be automatically repartitioned to 2 blocks.\n", - "\u001b[2m\u001b[36m(LightGBMTrainer pid=1491578)\u001b[0m UserWarning: cpus_per_actor is set to less than 2. Distributed LightGBM needs at least 2 CPUs per actor to train efficiently. This may lead to a degradation of performance during training.\n", - "\u001b[2m\u001b[36m(pid=1491651)\u001b[0m /home/ubuntu/ray/venv/lib/python3.8/site-packages/xgboost/compat.py:31: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - "\u001b[2m\u001b[36m(pid=1491651)\u001b[0m from pandas import MultiIndex, Int64Index\n", - "\u001b[2m\u001b[36m(pid=1491651)\u001b[0m FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - "\u001b[2m\u001b[36m(pid=1491651)\u001b[0m FutureWarning: pandas.Float64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - "\u001b[2m\u001b[36m(pid=1491651)\u001b[0m FutureWarning: pandas.UInt64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - "\u001b[2m\u001b[36m(pid=1491653)\u001b[0m /home/ubuntu/ray/venv/lib/python3.8/site-packages/dask/dataframe/backends.py:181: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - "\u001b[2m\u001b[36m(pid=1491653)\u001b[0m _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)\n", - "\u001b[2m\u001b[36m(pid=1491653)\u001b[0m /home/ubuntu/ray/venv/lib/python3.8/site-packages/dask/dataframe/backends.py:181: FutureWarning: pandas.Float64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - "\u001b[2m\u001b[36m(pid=1491653)\u001b[0m _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)\n", - "\u001b[2m\u001b[36m(pid=1491653)\u001b[0m /home/ubuntu/ray/venv/lib/python3.8/site-packages/dask/dataframe/backends.py:181: FutureWarning: pandas.UInt64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - "\u001b[2m\u001b[36m(pid=1491653)\u001b[0m _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)\n", - "\u001b[2m\u001b[36m(pid=1491653)\u001b[0m /home/ubuntu/ray/venv/lib/python3.8/site-packages/xgboost/compat.py:31: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - "\u001b[2m\u001b[36m(pid=1491653)\u001b[0m from pandas import MultiIndex, Int64Index\n", - "\u001b[2m\u001b[36m(pid=1491652)\u001b[0m /home/ubuntu/ray/venv/lib/python3.8/site-packages/dask/dataframe/backends.py:181: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - "\u001b[2m\u001b[36m(pid=1491652)\u001b[0m _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)\n", - "\u001b[2m\u001b[36m(pid=1491652)\u001b[0m /home/ubuntu/ray/venv/lib/python3.8/site-packages/dask/dataframe/backends.py:181: FutureWarning: pandas.Float64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - "\u001b[2m\u001b[36m(pid=1491652)\u001b[0m _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)\n", - "\u001b[2m\u001b[36m(pid=1491652)\u001b[0m /home/ubuntu/ray/venv/lib/python3.8/site-packages/dask/dataframe/backends.py:181: FutureWarning: pandas.UInt64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - "\u001b[2m\u001b[36m(pid=1491652)\u001b[0m _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)\n", - "\u001b[2m\u001b[36m(pid=1491652)\u001b[0m /home/ubuntu/ray/venv/lib/python3.8/site-packages/xgboost/compat.py:31: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - "\u001b[2m\u001b[36m(pid=1491652)\u001b[0m from pandas import MultiIndex, Int64Index\n", - "\u001b[2m\u001b[36m(_RemoteRayLightGBMActor pid=1491653)\u001b[0m 2022-06-22 17:26:50,509\tWARNING __init__.py:190 -- DeprecationWarning: `ray.worker.get_resource_ids` is a private attribute and access will be removed in a future Ray version.\n", - "\u001b[2m\u001b[36m(_RemoteRayLightGBMActor pid=1491652)\u001b[0m 2022-06-22 17:26:50,658\tWARNING __init__.py:190 -- DeprecationWarning: `ray.worker.get_resource_ids` is a private attribute and access will be removed in a future Ray version.\n" + "\u001b[2m\u001b[36m(LightGBMTrainer pid=10027)\u001b[0m The `preprocessor` arg to Trainer is deprecated. Apply preprocessor transformations ahead of time by calling `preprocessor.transform(ds)`. Support for the preprocessor arg will be dropped in a future release.\n", + "\u001b[2m\u001b[36m(LightGBMTrainer pid=10027)\u001b[0m Executing DAG InputDataBuffer[Input] -> TaskPoolMapOperator[MapBatches(get_pd_value_counts)]\n", + "\u001b[2m\u001b[36m(LightGBMTrainer pid=10027)\u001b[0m Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=None), locality_with_output=False, preserve_order=False, actor_locality_enabled=True, verbose_progress=False)\n", + "\u001b[2m\u001b[36m(LightGBMTrainer pid=10027)\u001b[0m Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`\n", + "\u001b[2m\u001b[36m(LightGBMTrainer pid=10027)\u001b[0m Tip: Use `take_batch()` instead of `take() / show()` to return records in pandas or numpy batch format.\n", + "\u001b[2m\u001b[36m(LightGBMTrainer pid=10027)\u001b[0m Executing DAG InputDataBuffer[Input] -> TaskPoolMapOperator[MapBatches(Categorizer._transform_pandas)] -> AllToAllOperator[Aggregate]\n", + "\u001b[2m\u001b[36m(LightGBMTrainer pid=10027)\u001b[0m Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=None), locality_with_output=False, preserve_order=False, actor_locality_enabled=True, verbose_progress=False)\n", + "\u001b[2m\u001b[36m(LightGBMTrainer pid=10027)\u001b[0m Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`\n", + " \n", + "\u001b[A\n", + "\u001b[A\n", + "\n", + "\u001b[A\u001b[A\n", + "\n", + "(pid=10027) Running: 0.0/10.0 CPU, 0.0/0.0 GPU, 0.0 MiB/512.0 MiB object_store_memory: 0%| | 0/14 [00:00 TaskPoolMapOperator[MapBatches(Categorizer._transform_pandas)->MapBatches(StandardScaler._transform_pandas)]\n", + "\n", + "\u001b[A\n", + "\n", + "(pid=10027) Running: 0.0/10.0 CPU, 0.0/0.0 GPU, 0.0 MiB/512.0 MiB object_store_memory: 7%|▋ | 1/14 [00:00<00:01, 7.59it/s]\n", + "\u001b[A \n", + "\n", + "\u001b[A\u001b[A \n", + "\n", + "\n", + "\u001b[2m\u001b[36m(LightGBMTrainer pid=10027)\u001b[0m Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=None), locality_with_output=False, preserve_order=False, actor_locality_enabled=True, verbose_progress=False)\n", + "\n", + "\u001b[A\n", + "\n", + "(pid=10027) Running: 0.0/10.0 CPU, 0.0/0.0 GPU, 0.0 MiB/512.0 MiB object_store_memory: 7%|▋ | 1/14 [00:00<00:01, 6.59it/s]\n", + "\u001b[A \n", + "\n", + "\u001b[A\u001b[A \n", + "\n", + "\n", + "\u001b[2m\u001b[36m(LightGBMTrainer pid=10027)\u001b[0m Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`\n", + "\n", + "\u001b[A\n", + "\n", + " \n", + "\u001b[A\n", + "\n", + "\u001b[A\u001b[A\n", + "\n", + "\u001b[2m\u001b[36m(LightGBMTrainer pid=10027)\u001b[0m Executing DAG InputDataBuffer[Input] -> TaskPoolMapOperator[MapBatches(Categorizer._transform_pandas)->MapBatches(StandardScaler._transform_pandas)]\n", + "\u001b[2m\u001b[36m(LightGBMTrainer pid=10027)\u001b[0m Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=None), locality_with_output=False, preserve_order=False, actor_locality_enabled=True, verbose_progress=False)\n", + "\u001b[2m\u001b[36m(LightGBMTrainer pid=10027)\u001b[0m Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`\n", + " \r" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "\u001b[2m\u001b[36m(_RemoteRayLightGBMActor pid=1491653)\u001b[0m [LightGBM] [Info] Trying to bind port 59039...\n", - "\u001b[2m\u001b[36m(_RemoteRayLightGBMActor pid=1491653)\u001b[0m [LightGBM] [Info] Binding port 59039 succeeded\n", - "\u001b[2m\u001b[36m(_RemoteRayLightGBMActor pid=1491653)\u001b[0m [LightGBM] [Info] Listening...\n", - "\u001b[2m\u001b[36m(_RemoteRayLightGBMActor pid=1491652)\u001b[0m [LightGBM] [Info] Trying to bind port 46955...\n", - "\u001b[2m\u001b[36m(_RemoteRayLightGBMActor pid=1491652)\u001b[0m [LightGBM] [Info] Binding port 46955 succeeded\n", - "\u001b[2m\u001b[36m(_RemoteRayLightGBMActor pid=1491652)\u001b[0m [LightGBM] [Info] Listening...\n", - "\u001b[2m\u001b[36m(_RemoteRayLightGBMActor pid=1491652)\u001b[0m [LightGBM] [Warning] Connecting to rank 1 failed, waiting for 200 milliseconds\n" + "\u001b[2m\u001b[36m(_RemoteRayLightGBMActor pid=10063)\u001b[0m [LightGBM] [Info] Trying to bind port 51134...\n", + "\u001b[2m\u001b[36m(_RemoteRayLightGBMActor pid=10063)\u001b[0m [LightGBM] [Info] Binding port 51134 succeeded\n", + "\u001b[2m\u001b[36m(_RemoteRayLightGBMActor pid=10063)\u001b[0m [LightGBM] [Info] Listening...\n", + "\u001b[2m\u001b[36m(_RemoteRayLightGBMActor pid=10062)\u001b[0m [LightGBM] [Warning] Connecting to rank 1 failed, waiting for 200 milliseconds\n", + "\u001b[2m\u001b[36m(_RemoteRayLightGBMActor pid=10063)\u001b[0m [LightGBM] [Info] Connected to rank 0\n", + "\u001b[2m\u001b[36m(_RemoteRayLightGBMActor pid=10063)\u001b[0m [LightGBM] [Info] Local rank: 1, total number of machines: 2\n", + "\u001b[2m\u001b[36m(_RemoteRayLightGBMActor pid=10063)\u001b[0m [LightGBM] [Warning] num_threads is set=2, n_jobs=-1 will be ignored. Current value: num_threads=2\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "\u001b[2m\u001b[36m(_RemoteRayLightGBMActor pid=1491653)\u001b[0m UserWarning: Overriding the parameters from Reference Dataset.\n", - "\u001b[2m\u001b[36m(_RemoteRayLightGBMActor pid=1491653)\u001b[0m UserWarning: categorical_column in param dict is overridden.\n", - "\u001b[2m\u001b[36m(_RemoteRayLightGBMActor pid=1491652)\u001b[0m UserWarning: Overriding the parameters from Reference Dataset.\n", - "\u001b[2m\u001b[36m(_RemoteRayLightGBMActor pid=1491652)\u001b[0m UserWarning: categorical_column in param dict is overridden.\n" + "\u001b[2m\u001b[36m(_RemoteRayLightGBMActor pid=10062)\u001b[0m /Users/balaji/Documents/GitHub/ray/.venv/lib/python3.11/site-packages/lightgbm/basic.py:1780: UserWarning: Overriding the parameters from Reference Dataset.\n", + "\u001b[2m\u001b[36m(_RemoteRayLightGBMActor pid=10062)\u001b[0m _log_warning('Overriding the parameters from Reference Dataset.')\n", + "\u001b[2m\u001b[36m(_RemoteRayLightGBMActor pid=10062)\u001b[0m /Users/balaji/Documents/GitHub/ray/.venv/lib/python3.11/site-packages/lightgbm/basic.py:1513: UserWarning: categorical_column in param dict is overridden.\n", + "\u001b[2m\u001b[36m(_RemoteRayLightGBMActor pid=10062)\u001b[0m _log_warning(f'{cat_alias} in param dict is overridden.')\n", + "2023-07-07 14:34:34,087\tINFO tune.py:1148 -- Total run time: 7.18 seconds (6.05 seconds for the tuning loop).\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "\u001b[2m\u001b[36m(_RemoteRayLightGBMActor pid=1491653)\u001b[0m [LightGBM] [Info] Connected to rank 0\n", - "\u001b[2m\u001b[36m(_RemoteRayLightGBMActor pid=1491653)\u001b[0m [LightGBM] [Info] Local rank: 1, total number of machines: 2\n", - "\u001b[2m\u001b[36m(_RemoteRayLightGBMActor pid=1491653)\u001b[0m [LightGBM] [Warning] num_threads is set=1, n_jobs=-1 will be ignored. Current value: num_threads=1\n", - "\u001b[2m\u001b[36m(_RemoteRayLightGBMActor pid=1491652)\u001b[0m [LightGBM] [Info] Connected to rank 1\n", - "\u001b[2m\u001b[36m(_RemoteRayLightGBMActor pid=1491652)\u001b[0m [LightGBM] [Info] Local rank: 0, total number of machines: 2\n", - "\u001b[2m\u001b[36m(_RemoteRayLightGBMActor pid=1491652)\u001b[0m [LightGBM] [Warning] num_threads is set=1, n_jobs=-1 will be ignored. Current value: num_threads=1\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[2m\u001b[36m(_QueueActor pid=1491650)\u001b[0m /home/ubuntu/ray/venv/lib/python3.8/site-packages/dask/dataframe/backends.py:181: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - "\u001b[2m\u001b[36m(_QueueActor pid=1491650)\u001b[0m _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)\n", - "\u001b[2m\u001b[36m(_QueueActor pid=1491650)\u001b[0m /home/ubuntu/ray/venv/lib/python3.8/site-packages/dask/dataframe/backends.py:181: FutureWarning: pandas.Float64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - "\u001b[2m\u001b[36m(_QueueActor pid=1491650)\u001b[0m _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)\n", - "\u001b[2m\u001b[36m(_QueueActor pid=1491650)\u001b[0m /home/ubuntu/ray/venv/lib/python3.8/site-packages/dask/dataframe/backends.py:181: FutureWarning: pandas.UInt64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - "\u001b[2m\u001b[36m(_QueueActor pid=1491650)\u001b[0m _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)\n", - "\u001b[2m\u001b[36m(_QueueActor pid=1491650)\u001b[0m /home/ubuntu/ray/venv/lib/python3.8/site-packages/xgboost/compat.py:31: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - "\u001b[2m\u001b[36m(_QueueActor pid=1491650)\u001b[0m from pandas import MultiIndex, Int64Index\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Result for LightGBMTrainer_7b049_00000:\n", - " date: 2022-06-22_17-26-53\n", - " done: false\n", - " experiment_id: b4a87c26a7604a43baf895755d4f16b3\n", - " hostname: ip-172-31-43-110\n", - " iterations_since_restore: 1\n", - " node_ip: 172.31.43.110\n", - " pid: 1491578\n", - " should_checkpoint: true\n", - " time_since_restore: 8.369545459747314\n", - " time_this_iter_s: 8.369545459747314\n", - " time_total_s: 8.369545459747314\n", - " timestamp: 1655918813\n", - " timesteps_since_restore: 0\n", - " train-binary_error: 0.5175879396984925\n", - " train-binary_logloss: 0.6302848981539763\n", - " training_iteration: 1\n", - " trial_id: 7b049_00000\n", - " valid-binary_error: 0.2\n", - " valid-binary_logloss: 0.558752017793943\n", - " warmup_time: 0.008721590042114258\n", - " \n", - "Result for LightGBMTrainer_7b049_00000:\n", - " date: 2022-06-22_17-26-56\n", - " done: true\n", - " experiment_id: b4a87c26a7604a43baf895755d4f16b3\n", - " experiment_tag: '0'\n", - " hostname: ip-172-31-43-110\n", - " iterations_since_restore: 100\n", - " node_ip: 172.31.43.110\n", - " pid: 1491578\n", - " should_checkpoint: true\n", - " time_since_restore: 10.972588300704956\n", - " time_this_iter_s: 0.027977466583251953\n", - " time_total_s: 10.972588300704956\n", - " timestamp: 1655918816\n", - " timesteps_since_restore: 0\n", - " train-binary_error: 0.0\n", - " train-binary_logloss: 0.0005745220956391456\n", - " training_iteration: 100\n", - " trial_id: 7b049_00000\n", - " valid-binary_error: 0.058823529411764705\n", - " valid-binary_logloss: 0.17189847605331432\n", - " warmup_time: 0.008721590042114258\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2022-06-22 17:26:56,406\tINFO tune.py:734 -- Total run time: 14.73 seconds (14.06 seconds for the tuning loop).\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'train-binary_logloss': 0.0005745220956391456, 'train-binary_error': 0.0, 'valid-binary_logloss': 0.17189847605331432, 'valid-binary_error': 0.058823529411764705, 'time_this_iter_s': 0.027977466583251953, 'should_checkpoint': True, 'done': True, 'timesteps_total': None, 'episodes_total': None, 'training_iteration': 100, 'trial_id': '7b049_00000', 'experiment_id': 'b4a87c26a7604a43baf895755d4f16b3', 'date': '2022-06-22_17-26-56', 'timestamp': 1655918816, 'time_total_s': 10.972588300704956, 'pid': 1491578, 'hostname': 'ip-172-31-43-110', 'node_ip': '172.31.43.110', 'config': {}, 'time_since_restore': 10.972588300704956, 'timesteps_since_restore': 0, 'iterations_since_restore': 100, 'warmup_time': 0.008721590042114258, 'experiment_tag': '0'}\n" + "{'train-binary_logloss': 0.00020229312743896637, 'train-binary_error': 0.0, 'valid-binary_logloss': 0.13023245107091222, 'valid-binary_error': 0.023529411764705882, 'time_this_iter_s': 0.021785974502563477, 'should_checkpoint': True, 'done': True, 'training_iteration': 101, 'trial_id': '0c5ae_00000', 'date': '2023-07-07_14-34-34', 'timestamp': 1688765674, 'time_total_s': 4.582904100418091, 'pid': 10027, 'hostname': 'Balajis-MacBook-Pro-16', 'node_ip': '127.0.0.1', 'config': {}, 'time_since_restore': 4.582904100418091, 'iterations_since_restore': 101, 'experiment_tag': '0'}\n" ] } ], @@ -361,6 +389,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "d7155d9b", "metadata": {}, @@ -370,7 +399,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 7, "id": "871c9be6", "metadata": {}, "outputs": [ @@ -378,25 +407,33 @@ "name": "stderr", "output_type": "stream", "text": [ - "2022-06-22 17:26:57,517\tWARNING read_api.py:260 -- The number of blocks in this dataset (1) limits its parallelism to 1 concurrent tasks. This is much less than the number of available CPU slots in the cluster. Use `.repartition(n)` to increase the number of dataset blocks.\n", - "Map_Batches: 100%|██████████| 1/1 [00:00<00:00, 50.96it/s]\n", - "Map_Batches: 0%| | 0/1 [00:00 ActorPoolMapOperator[MapBatches()->MapBatches(Predict)] -> TaskPoolMapOperator[MapBatches()]\n", + "2023-07-07 14:34:38,674\tINFO streaming_executor.py:93 -- Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=None), locality_with_output=False, preserve_order=False, actor_locality_enabled=True, verbose_progress=False)\n", + "2023-07-07 14:34:38,676\tINFO streaming_executor.py:95 -- Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`\n", + "2023-07-07 14:34:38,701\tINFO actor_pool_map_operator.py:117 -- MapBatches()->MapBatches(Predict): Waiting for 1 pool actors to start...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "PREDICTED LABELS\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " " ] }, { "name": "stdout", "output_type": "stream", "text": [ - "PREDICTED LABELS\n", "{'predictions': 1}\n", "{'predictions': 1}\n", "{'predictions': 0}\n", @@ -423,64 +460,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Map_Batches: 0%| | 0/1 [00:00\n", - "
\n", - "

Ray

\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - "\n", - "
Python version:3.9.15
Ray version: 2.6.0
Dashboard:http://console.anyscale.com/api/v2/sessions/ses_jgkdnu2723aleytwqqhebr12vs/services?redirect_to=dashboard
\n", - "
\n", - "\n" - ], - "text/plain": [ - "RayContext(dashboard_url='console.anyscale.com/api/v2/sessions/ses_jgkdnu2723aleytwqqhebr12vs/services?redirect_to=dashboard', python_version='3.9.15', ray_version='2.6.0', ray_commit='17df2ef17983406bb178c251044c9dc654b378c0', address_info={'node_ip_address': '172.31.244.129', 'raylet_ip_address': '172.31.244.129', 'redis_address': None, 'object_store_address': '/tmp/ray/session_2023-04-22_11-09-11_790337_150/sockets/plasma_store', 'raylet_socket_name': '/tmp/ray/session_2023-04-22_11-09-11_790337_150/sockets/raylet', 'webui_url': 'console.anyscale.com/api/v2/sessions/ses_jgkdnu2723aleytwqqhebr12vs/services?redirect_to=dashboard', 'session_dir': '/tmp/ray/session_2023-04-22_11-09-11_790337_150', 'metrics_export_port': 61073, 'gcs_address': '172.31.244.129:9031', 'address': '172.31.244.129:9031', 'dashboard_agent_listen_port': 52365, 'node_id': 'e6e9dfeda4469dd816c080bec2cf1cd12abdd978ae74b87e869164eb'})" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import ray\n", - "\n", - "ray.init(\n", - " runtime_env={\n", - " \"pip\": [\n", - " \"numpy==1.23\",\n", - " \"protobuf==3.20.0\",\n", - " \"transformers==4.27.2\",\n", - " \"accelerate==0.17.1\",\n", - " \"deepspeed==0.8.3\",\n", - " ],\n", - " \"env_vars\": {\n", - " \"HF_HUB_DISABLE_PROGRESS_BARS\": \"1\",\n", - " }\n", - " }\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "b619a878", - "metadata": {}, - "source": [ - "## Define Hyperparameters\n", - "\n", - "Define a list of hyperparameters as a global dataclass.\n", - "\n", - "Refer to https://deepspeed.readthedocs.io/en/stable/inference-init.html#deepspeed.inference.config.DeepSpeedInferenceConfig for more details about the configurations of a DeepSpeed inference job." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "613df744", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "from dataclasses import dataclass\n", - "from typing import Optional\n", - "\n", - "\n", - "@dataclass\n", - "class Config:\n", - " model_name: str = \"facebook/opt-30b\"\n", - " # Path to HuggingFace cache directory. Default is ~/.cache/huggingface/.\n", - " cache_dir: Optional[str] = None\n", - " # Path to the directory that actually holds model files.\n", - " # e.g., ~/.cache/huggingface/models--facebook--opt-30b/snapshots/xxx/\n", - " # If this path is not None, we skip download models from HuggingFace.\n", - " repo_root: Optional[str] = None\n", - " # This is how many DeepSpeed-inference replicas to run for\n", - " # this batch inference job.\n", - " num_worker_groups: int = 1\n", - " # Number of DeepSpeed workers per group.\n", - " num_workers_per_group: int = 8\n", - "\n", - " batch_size: int = 1\n", - " dtype: str = \"float16\"\n", - " # Maximum number of tokens DeepSpeed inference-engine can work with,\n", - " # including the input and output tokens.\n", - " max_tokens: int = 1024\n", - " # Use meta tensors to initialize model.\n", - " use_meta_tensor: bool = True\n", - " # Use cache for generation.\n", - " use_cache: bool = True\n", - " # The path for which we want to save the loaded model with a checkpoint.\n", - " save_mp_checkpoint_path: Optional[str] = None\n", - "\n", - "\n", - "config = Config()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "28df05bf", - "metadata": {}, - "source": [ - "## Download and Cache Model\n", - "\n", - "Next, we will download and cache model files on all instances of the cluster before we run the job.\n", - "\n", - "Notice that when we download model snapshots from HuggingFace, we skip files that end with safetensors, msgpack, and h5 extensions. These are Tensorflow and JAX weight files. We only need PyTorch weights for this example.\n", - "\n", - "We execute the ``download_model()`` function on every node of the cluster by using a ``NodeAffinitySchedulingStrategy`` from Ray Core." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "63b8a84d-57a6-4430-8fe8-9811760b8b7c", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Caching model locally ...\n", - "Done. Model saved in /home/ray/.cache/huggingface/hub/models--facebook--opt-30b/snapshots/ceea0a90ac0f6fae7c2c34bcb40477438c152546\n" - ] - } - ], - "source": [ - "\n", - "from huggingface_hub import snapshot_download\n", - "import ray\n", - "from ray.util.scheduling_strategies import NodeAffinitySchedulingStrategy\n", - "\n", - "\n", - "@ray.remote\n", - "def download_model(config: Config):\n", - " # This function downloads the specified HF model into a local directory.\n", - " # This can also download models from cloud storages like S3.\n", - " return snapshot_download(\n", - " repo_id=config.model_name,\n", - " cache_dir=config.cache_dir,\n", - " allow_patterns=[\"*\"],\n", - " # Skip downloading TF and FLAX weight files.\n", - " ignore_patterns=[\"*.safetensors\", \"*.msgpack\", \"*.h5\"],\n", - " revision=None,\n", - " )\n", - "\n", - "if config.repo_root is None:\n", - " # Download model files to all GPU nodes, and set correct repo_root.\n", - " refs = []\n", - " for node in ray.nodes():\n", - " if node[\"Alive\"] and node[\"Resources\"].get(\"GPU\", None):\n", - " node_id = node[\"NodeID\"]\n", - " scheduling_strategy = NodeAffinitySchedulingStrategy(\n", - " node_id=node_id, soft=False\n", - " )\n", - " options = {\"scheduling_strategy\": scheduling_strategy}\n", - " refs.append(\n", - " download_model.options(scheduling_strategy=scheduling_strategy).remote(config)\n", - " )\n", - "\n", - " print(\"Caching model locally ...\")\n", - "\n", - " # Wait for models to finish downloading.\n", - " config.repo_root = ray.get(refs)[0]\n", - "\n", - " print(f\"Done. Model saved in {config.repo_root}\")\n", - "else:\n", - " print(f\"Using existing model saved in {config.repo_root}\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "6b14b7d9", - "metadata": {}, - "source": [ - "## Define DeepSpeed Utility Classes\n", - "\n", - "Next, we define a few utility classes and functions that are useful for setting up and running the DeepSpeed inference job.\n", - "\n", - "Note that the Pipeline is modeled after https://github.com/microsoft/DeepSpeedExamples/tree/efacebb3ddbea86bb20c3af30fd060be0fa41ac8/inference/huggingface/text-generation." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "f9aad2a9", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/ray/anaconda3/lib/python3.9/site-packages/xgboost/compat.py:31: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - " from pandas import MultiIndex, Int64Index\n" - ] - } - ], - "source": [ - "import gc\n", - "import io\n", - "import json\n", - "import math\n", - "import os\n", - "from pathlib import Path\n", - "from typing import List\n", - "\n", - "import deepspeed\n", - "import torch\n", - "from deepspeed.runtime.utils import see_memory_usage\n", - "from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer\n", - "\n", - "\n", - "class DSPipeline:\n", - " \"\"\"\n", - " Example helper class for comprehending DeepSpeed Meta Tensors, meant to mimic HF pipelines.\n", - " The DSPipeline can run with and without meta tensors.\n", - " \"\"\"\n", - "\n", - " def __init__(\n", - " self,\n", - " model_name,\n", - " dtype=torch.float16,\n", - " is_meta=True,\n", - " device=-1,\n", - " repo_root=None,\n", - " ):\n", - " self.model_name = model_name\n", - " self.dtype = dtype\n", - "\n", - " if isinstance(device, torch.device):\n", - " self.device = device\n", - " elif isinstance(device, str):\n", - " self.device = torch.device(device)\n", - " elif device < 0:\n", - " self.device = torch.device(\"cpu\")\n", - " else:\n", - " self.device = torch.device(f\"cuda:{device}\")\n", - "\n", - " self.tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side=\"right\")\n", - " self.tokenizer.pad_token = self.tokenizer.eos_token\n", - "\n", - " if is_meta:\n", - " # When meta tensors enabled, use checkpoints\n", - " self.config = AutoConfig.from_pretrained(self.model_name)\n", - " self.checkpoints_json = self._generate_json(repo_root)\n", - "\n", - " with deepspeed.OnDevice(dtype=dtype, device=\"meta\"):\n", - " self.model = AutoModelForCausalLM.from_config(self.config)\n", - " else:\n", - " self.model = AutoModelForCausalLM.from_pretrained(self.model_name)\n", - "\n", - " self.model.eval()\n", - "\n", - " def __call__(self, inputs, **kwargs):\n", - " input_list = [inputs] if isinstance(inputs, str) else inputs\n", - " outputs = self.generate_outputs(input_list, **kwargs)\n", - " return outputs\n", - "\n", - " def _generate_json(self, repo_root):\n", - " if os.path.exists(os.path.join(repo_root, \"ds_inference_config.json\")):\n", - " # Simply use the available inference config.\n", - " return os.path.join(repo_root, \"ds_inference_config.json\")\n", - "\n", - " # Write a checkpoints config file in local directory.\n", - " checkpoints_json = \"checkpoints.json\"\n", - "\n", - " with io.open(checkpoints_json, \"w\", encoding=\"utf-8\") as f:\n", - " file_list = [\n", - " str(entry).split(\"/\")[-1]\n", - " for entry in Path(repo_root).rglob(\"*.[bp][it][n]\")\n", - " if entry.is_file()\n", - " ]\n", - " data = {\n", - " # Hardcode bloom for now.\n", - " # Possible choices are \"bloom\", \"ds_model\", \"Megatron\".\n", - " \"type\": \"bloom\",\n", - " \"checkpoints\": file_list,\n", - " \"version\": 1.0\n", - " }\n", - " json.dump(data, f)\n", - "\n", - " return checkpoints_json\n", - "\n", - " def generate_outputs(self, inputs, **generate_kwargs):\n", - " input_tokens = self.tokenizer.batch_encode_plus(\n", - " inputs, return_tensors=\"pt\", padding=True\n", - " )\n", - " for t in input_tokens:\n", - " if torch.is_tensor(input_tokens[t]):\n", - " input_tokens[t] = input_tokens[t].to(self.device)\n", - "\n", - " self.model.cuda().to(self.device)\n", - "\n", - " outputs = self.model.generate(**input_tokens, **generate_kwargs)\n", - " outputs = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)\n", - "\n", - " return outputs\n", - "\n", - "\n", - "def _memory_usage(gpu_id: int, msg: str):\n", - " \"\"\"Print memory usage.\"\"\"\n", - " if gpu_id != 0:\n", - " return\n", - " see_memory_usage(msg, True)\n", - "\n", - "\n", - "def init_model(config: Config, world_size: int, gpu_id: int) -> DSPipeline:\n", - " \"\"\"Initialize the deepspeed model.\"\"\"\n", - " data_type = getattr(torch, config.dtype)\n", - "\n", - " _memory_usage(gpu_id, \"before init\")\n", - " pipe = DSPipeline(\n", - " model_name=config.model_name,\n", - " dtype=data_type,\n", - " is_meta=config.use_meta_tensor,\n", - " device=gpu_id,\n", - " repo_root=config.repo_root,\n", - " )\n", - " _memory_usage(gpu_id, \"after init\")\n", - "\n", - " if config.use_meta_tensor:\n", - " ds_kwargs = dict(\n", - " base_dir=config.repo_root, checkpoint=pipe.checkpoints_json\n", - " )\n", - " else:\n", - " ds_kwargs = dict()\n", - "\n", - " gc.collect()\n", - "\n", - " pipe.model = deepspeed.init_inference(\n", - " pipe.model,\n", - " dtype=data_type,\n", - " mp_size=world_size,\n", - " replace_with_kernel_inject=True,\n", - " replace_method=True,\n", - " max_tokens=config.max_tokens,\n", - " save_mp_checkpoint_path=config.save_mp_checkpoint_path,\n", - " **ds_kwargs,\n", - " )\n", - " _memory_usage(gpu_id, \"after init_inference\")\n", - "\n", - " return pipe\n", - "\n", - "\n", - "def generate(\n", - " input_sentences: List[str], pipe: DSPipeline, batch_size: int, **generate_kwargs\n", - ") -> List[str]:\n", - " \"\"\"Generate predictions using a DSPipeline.\"\"\"\n", - " if batch_size > len(input_sentences):\n", - " # Dynamically extend to support larger bs by repetition.\n", - " input_sentences *= math.ceil(batch_size / len(input_sentences))\n", - "\n", - " inputs = input_sentences[:batch_size]\n", - " outputs = pipe(inputs, **generate_kwargs)\n", - " return outputs" - ] - }, - { - "cell_type": "markdown", - "id": "bd20d4d9", - "metadata": {}, - "source": [] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "62eee91d", - "metadata": {}, - "source": [ - "## Define a DeepSpeed Predictor\n", - "\n", - "Define an AIR Predictor to be instantiated by the Dataset pipeline below.\n", - "\n", - "Each DeepSpeedPredictor is a stateful Ray actor that understands how to process the input prompt using a group of DeepSpeed inference workers.\n", - "\n", - "More specifically, each DeepSpeedPredictor sets up a proper PyTorch DDP process group before spinning up multiple PredictionWorkers. Since the model is loaded using the DeepSpeed inference framework, each PredictionWorker handles a shard of the entire DeepSpeed inference model.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "516a200d-14e4-4b52-a615-e09778ba4117", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "from typing import List\n", - "\n", - "import pandas as pd\n", - "import ray\n", - "import ray.util\n", - "from ray.air import Checkpoint, ScalingConfig\n", - "from ray.air.util.torch_dist import (\n", - " TorchDistributedWorker,\n", - " init_torch_dist_process_group,\n", - " shutdown_torch_dist_process_group,\n", - ")\n", - "from ray.train.predictor import Predictor\n", - "from ray.util.scheduling_strategies import PlacementGroupSchedulingStrategy\n", - "\n", - "\n", - "@ray.remote\n", - "class PredictionWorker(TorchDistributedWorker):\n", - " \"\"\"A PredictionWorker is a Ray remote actor that runs a single shard of a DeepSpeed job.\n", - " \n", - " Multiple PredictionWorkers of the same WorkerGroup form a PyTorch DDP process\n", - " group and work together under the orchestration of DeepSpeed.\n", - " \"\"\"\n", - " def __init__(self, config: Config, world_size: int):\n", - " self.config = config\n", - " self.world_size = world_size\n", - "\n", - " def init_model(self, local_rank: int):\n", - " \"\"\"Initialize model for inference.\"\"\"\n", - " # Note: We have to provide the local_rank that was used to initiate\n", - " # the DDP process group here. e.g., a PredictionWorker may be the\n", - " # rank 0 worker of a group, but occupies gpu 7.\n", - " self.generator = init_model(self.config, self.world_size, local_rank)\n", - "\n", - " def generate(self, data: pd.DataFrame, column: str, **kwargs) -> List[str]:\n", - " return generate(\n", - " list(data[column]), self.generator, self.config.batch_size, **kwargs\n", - " )\n", - "\n", - "\n", - "# TODO: This Predictor should be part of Ray AIR.\n", - "class DeepSpeedPredictor(Predictor):\n", - " def __init__(self, checkpoint: Checkpoint, scaling_config: ScalingConfig) -> None:\n", - " self.checkpoint = checkpoint\n", - " self.scaling_config = scaling_config\n", - " self.init_worker_group(scaling_config)\n", - "\n", - " def __del__(self):\n", - " shutdown_torch_dist_process_group(self.prediction_workers)\n", - "\n", - " def init_worker_group(self, scaling_config: ScalingConfig):\n", - " \"\"\"Create the worker group.\n", - "\n", - " Each worker in the group communicates with other workers through the\n", - " torch distributed backend. The worker group is inelastic (a failure of\n", - " one worker destroys the entire group). Each worker in the group\n", - " recieves the same input data and outputs the same generated text.\n", - " \"\"\"\n", - " config = self.checkpoint.to_dict()[\"config\"]\n", - "\n", - " # Start a placement group for the workers.\n", - " self.pg = scaling_config.as_placement_group_factory().to_placement_group()\n", - " prediction_worker_cls = PredictionWorker.options(\n", - " num_cpus=scaling_config.num_cpus_per_worker,\n", - " num_gpus=scaling_config.num_gpus_per_worker,\n", - " resources=scaling_config.additional_resources_per_worker,\n", - " scheduling_strategy=PlacementGroupSchedulingStrategy(\n", - " placement_group=self.pg, placement_group_capture_child_tasks=True\n", - " ),\n", - " )\n", - " # Create the prediction workers.\n", - " self.prediction_workers = [\n", - " prediction_worker_cls.remote(config, scaling_config.num_workers)\n", - " for i in range(scaling_config.num_workers)\n", - " ]\n", - "\n", - " # Initialize torch distributed process group for the workers.\n", - " local_ranks = init_torch_dist_process_group(self.prediction_workers, backend=\"nccl\")\n", - "\n", - " # Initialize the model on each worker.\n", - " ray.get([\n", - " worker.init_model.remote(local_rank)\n", - " for worker, local_rank in zip(self.prediction_workers, local_ranks)\n", - " ])\n", - "\n", - " def _predict_pandas(\n", - " self,\n", - " data: pd.DataFrame,\n", - " input_column: str = \"prompt\",\n", - " output_column: str = \"output\",\n", - " **kwargs\n", - " ) -> pd.DataFrame:\n", - " data_ref = ray.put(data)\n", - " prediction = ray.get(\n", - " [\n", - " worker.generate.remote(data_ref, column=input_column, **kwargs)\n", - " for worker in self.prediction_workers\n", - " ]\n", - " )[0]\n", - "\n", - " return pd.DataFrame(prediction, columns=[output_column])\n", - "\n", - " @classmethod\n", - " def from_checkpoint(cls, checkpoint: Checkpoint, **kwargs) -> \"Predictor\":\n", - " return cls(checkpoint=checkpoint, **kwargs)\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "ca57e150", - "metadata": {}, - "source": [ - "## Create a Dataset Pipeline\n", - "\n", - "Finally, we connect all these pieces together, and use a BatchPredictor to run multiple copies of the DeepSpeedPredictor actors.\n", - "\n", - "This step helps parallelize our batch inference job and utilize all available resources in the cluster." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "48bf4a4f-0ac4-4e77-a05a-710d42e0dc4e", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-04-22 11:14:12,074\tWARNING dataset.py:4124 -- Deprecation warning: use Dataset.materialize() instead of fully_executed().\n", - "2023-04-22 11:14:12,079\tINFO streaming_executor.py:87 -- Executing DAG InputDataBuffer[Input] -> AllToAllOperator[Repartition] -> AllToAllOperator[RandomShuffle]\n", - "2023-04-22 11:14:12,081\tINFO streaming_executor.py:88 -- Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=None), locality_with_output=False, preserve_order=False, actor_locality_enabled=True, verbose_progress=False)\n", - "2023-04-22 11:14:12,082\tINFO streaming_executor.py:90 -- Tip: To enable per-operator progress reporting, set RAY_DATA_VERBOSE_PROGRESS=1.\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "- Repartition 1: 0%| | 0/16 [00:00 ActorPoolMapOperator[MapBatches(ScoringWrapper)]\n", - "2023-04-22 11:14:12,682\tINFO streaming_executor.py:88 -- Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=None), locality_with_output=False, preserve_order=False, actor_locality_enabled=True, verbose_progress=False)\n", - "2023-04-22 11:14:12,683\tINFO streaming_executor.py:90 -- Tip: To enable per-operator progress reporting, set RAY_DATA_VERBOSE_PROGRESS=1.\n", - "2023-04-22 11:14:12,785\tINFO actor_pool_map_operator.py:114 -- MapBatches(ScoringWrapper): Waiting for 1 pool actors to start...\n", - "(_MapWorker pid=7005) The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.\n", - "0it [00:00, ?it/s]05) \n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(PredictionWorker pid=10038) [2023-04-22 11:14:30,762] [INFO] [utils.py:829:see_memory_usage] before init\n", - "(PredictionWorker pid=10038) [2023-04-22 11:14:30,762] [INFO] [utils.py:830:see_memory_usage] MA 0.0 GB Max_MA 0.0 GB CA 0.0 GB Max_CA 0 GB \n", - "(PredictionWorker pid=10038) [2023-04-22 11:14:30,762] [INFO] [utils.py:838:see_memory_usage] CPU Virtual Memory: used = 11.63 GB, percent = 2.4%\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "(PredictionWorker pid=10040) --------------------------------------------------------------------------\n", - "(PredictionWorker pid=10040) Aim collects anonymous usage analytics. \n", - "(PredictionWorker pid=10040) Read how to opt-out here: \n", - "(PredictionWorker pid=10040) https://aimstack.readthedocs.io/en/latest/community/telemetry.html \n", - "(PredictionWorker pid=10040) --------------------------------------------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(PredictionWorker pid=10045) [2023-04-22 11:14:33,061] [INFO] [logging.py:93:log_dist] [Rank -1] DeepSpeed info: version=0.8.3, git-hash=unknown, git-branch=unknown\n", - "(PredictionWorker pid=10045) [2023-04-22 11:14:33,062] [WARNING] [config_utils.py:75:_process_deprecated_field] Config parameter replace_method is deprecated. This parameter is no longer needed, please remove from your call to DeepSpeed-inference\n", - "(PredictionWorker pid=10045) [2023-04-22 11:14:33,062] [WARNING] [config_utils.py:75:_process_deprecated_field] Config parameter mp_size is deprecated use tensor_parallel.tp_size instead\n", - "(PredictionWorker pid=10045) [2023-04-22 11:14:33,062] [INFO] [logging.py:93:log_dist] [Rank -1] quantize_bits = 8 mlp_extra_grouping = False, quantize_groups = 1\n", - "(PredictionWorker pid=10038) [2023-04-22 11:14:33,074] [INFO] [utils.py:829:see_memory_usage] after init\n", - "(PredictionWorker pid=10038) [2023-04-22 11:14:33,075] [INFO] [utils.py:830:see_memory_usage] MA 0.0 GB Max_MA 0.0 GB CA 0.0 GB Max_CA 0 GB \n", - "(PredictionWorker pid=10038) [2023-04-22 11:14:33,075] [INFO] [utils.py:838:see_memory_usage] CPU Virtual Memory: used = 12.25 GB, percent = 2.6%\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "(PredictionWorker pid=10040) Using /home/ray/.cache/torch_extensions/py39_cu116 as PyTorch extensions root...\n", - "(PredictionWorker pid=10038) Creating extension directory /home/ray/.cache/torch_extensions/py39_cu116/transformer_inference...\n", - "(PredictionWorker pid=10038) Detected CUDA files, patching ldflags\n", - "(PredictionWorker pid=10038) Emitting ninja build file /home/ray/.cache/torch_extensions/py39_cu116/transformer_inference/build.ninja...\n", - "(PredictionWorker pid=10038) Building extension module transformer_inference...\n", - "(PredictionWorker pid=10038) Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(PredictionWorker pid=10038) [1/9] /usr/local/cuda/bin/nvcc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -I/home/ray/anaconda3/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/ray/anaconda3/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/ray/anaconda3/lib/python3.9/site-packages/torch/include -isystem /home/ray/anaconda3/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/ray/anaconda3/lib/python3.9/site-packages/torch/include/TH -isystem /home/ray/anaconda3/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /home/ray/anaconda3/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_70,code=compute_70 -gencode=arch=compute_70,code=sm_70 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_70,code=compute_70 -c /home/ray/anaconda3/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/dequantize.cu -o dequantize.cuda.o \n", - "(PredictionWorker pid=10038) [2/9] /usr/local/cuda/bin/nvcc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -I/home/ray/anaconda3/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/ray/anaconda3/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/ray/anaconda3/lib/python3.9/site-packages/torch/include -isystem /home/ray/anaconda3/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/ray/anaconda3/lib/python3.9/site-packages/torch/include/TH -isystem /home/ray/anaconda3/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /home/ray/anaconda3/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_70,code=compute_70 -gencode=arch=compute_70,code=sm_70 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_70,code=compute_70 -c /home/ray/anaconda3/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/relu.cu -o relu.cuda.o \n", - "(PredictionWorker pid=10038) [3/9] /usr/local/cuda/bin/nvcc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -I/home/ray/anaconda3/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/ray/anaconda3/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/ray/anaconda3/lib/python3.9/site-packages/torch/include -isystem /home/ray/anaconda3/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/ray/anaconda3/lib/python3.9/site-packages/torch/include/TH -isystem /home/ray/anaconda3/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /home/ray/anaconda3/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_70,code=compute_70 -gencode=arch=compute_70,code=sm_70 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_70,code=compute_70 -c /home/ray/anaconda3/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/apply_rotary_pos_emb.cu -o apply_rotary_pos_emb.cuda.o \n", - "(PredictionWorker pid=10038) [4/9] /usr/local/cuda/bin/nvcc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -I/home/ray/anaconda3/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/ray/anaconda3/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/ray/anaconda3/lib/python3.9/site-packages/torch/include -isystem /home/ray/anaconda3/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/ray/anaconda3/lib/python3.9/site-packages/torch/include/TH -isystem /home/ray/anaconda3/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /home/ray/anaconda3/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_70,code=compute_70 -gencode=arch=compute_70,code=sm_70 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_70,code=compute_70 -c /home/ray/anaconda3/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu -o transform.cuda.o \n", - "(PredictionWorker pid=10038) /home/ray/anaconda3/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu(56): warning #177-D: variable \"lane\" was declared but never referenced\n", - "(PredictionWorker pid=10038) \n", - "(PredictionWorker pid=10038) /home/ray/anaconda3/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu(93): warning #177-D: variable \"half_dim\" was declared but never referenced\n", - "(PredictionWorker pid=10038) \n", - "(PredictionWorker pid=10038) /home/ray/anaconda3/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu(110): warning #177-D: variable \"vals_half\" was declared but never referenced\n", - "(PredictionWorker pid=10038) \n", - "(PredictionWorker pid=10038) /home/ray/anaconda3/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu(111): warning #177-D: variable \"output_half\" was declared but never referenced\n", - "(PredictionWorker pid=10038) \n", - "(PredictionWorker pid=10038) /home/ray/anaconda3/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu(128): warning #177-D: variable \"lane\" was declared but never referenced\n", - "(PredictionWorker pid=10038) \n", - "(PredictionWorker pid=10038) [5/9] /usr/local/cuda/bin/nvcc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -I/home/ray/anaconda3/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/ray/anaconda3/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/ray/anaconda3/lib/python3.9/site-packages/torch/include -isystem /home/ray/anaconda3/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/ray/anaconda3/lib/python3.9/site-packages/torch/include/TH -isystem /home/ray/anaconda3/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /home/ray/anaconda3/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_70,code=compute_70 -gencode=arch=compute_70,code=sm_70 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_70,code=compute_70 -c /home/ray/anaconda3/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/softmax.cu -o softmax.cuda.o \n", - "(PredictionWorker pid=10038) /home/ray/anaconda3/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/softmax.cu(272): warning #177-D: variable \"alibi_offset\" was declared but never referenced\n", - "(PredictionWorker pid=10038) \n", - "(PredictionWorker pid=10038) /home/ray/anaconda3/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/softmax.cu(427): warning #177-D: variable \"warp_num\" was declared but never referenced\n", - "(PredictionWorker pid=10038) \n", - "(PredictionWorker pid=10038) [6/9] /usr/local/cuda/bin/nvcc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -I/home/ray/anaconda3/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/ray/anaconda3/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/ray/anaconda3/lib/python3.9/site-packages/torch/include -isystem /home/ray/anaconda3/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/ray/anaconda3/lib/python3.9/site-packages/torch/include/TH -isystem /home/ray/anaconda3/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /home/ray/anaconda3/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_70,code=compute_70 -gencode=arch=compute_70,code=sm_70 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_70,code=compute_70 -c /home/ray/anaconda3/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/gelu.cu -o gelu.cuda.o \n", - "(PredictionWorker pid=10038) [2023-04-22 11:14:33,250] [INFO] [logging.py:93:log_dist] [Rank -1] DeepSpeed info: version=0.8.3, git-hash=unknown, git-branch=unknown [repeated 7x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/ray-logging.html#log-deduplication for more options.)\n", - "(PredictionWorker pid=10038) [2023-04-22 11:14:33,251] [WARNING] [config_utils.py:75:_process_deprecated_field] Config parameter replace_method is deprecated. This parameter is no longer needed, please remove from your call to DeepSpeed-inference [repeated 7x across cluster]\n", - "(PredictionWorker pid=10038) [2023-04-22 11:14:33,251] [WARNING] [config_utils.py:75:_process_deprecated_field] Config parameter mp_size is deprecated use tensor_parallel.tp_size instead [repeated 7x across cluster]\n", - "(PredictionWorker pid=10038) [2023-04-22 11:14:33,251] [INFO] [logging.py:93:log_dist] [Rank -1] quantize_bits = 8 mlp_extra_grouping = False, quantize_groups = 1 [repeated 7x across cluster]\n", - "(PredictionWorker pid=10038) [7/9] /usr/local/cuda/bin/nvcc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -I/home/ray/anaconda3/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/ray/anaconda3/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/ray/anaconda3/lib/python3.9/site-packages/torch/include -isystem /home/ray/anaconda3/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/ray/anaconda3/lib/python3.9/site-packages/torch/include/TH -isystem /home/ray/anaconda3/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /home/ray/anaconda3/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_70,code=compute_70 -gencode=arch=compute_70,code=sm_70 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_70,code=compute_70 -c /home/ray/anaconda3/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu -o layer_norm.cuda.o \n", - "(PredictionWorker pid=10038) [8/9] c++ -MMD -MF pt_binding.o.d -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -I/home/ray/anaconda3/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/ray/anaconda3/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/ray/anaconda3/lib/python3.9/site-packages/torch/include -isystem /home/ray/anaconda3/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/ray/anaconda3/lib/python3.9/site-packages/torch/include/TH -isystem /home/ray/anaconda3/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /home/ray/anaconda3/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++14 -O3 -std=c++14 -g -Wno-reorder -c /home/ray/anaconda3/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp -o pt_binding.o \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "(PredictionWorker pid=10038) Loading extension module transformer_inference...\n", - "(PredictionWorker pid=10041) -------------------------------------------------------------------------- [repeated 14x across cluster]\n", - "(PredictionWorker pid=10041) Aim collects anonymous usage analytics. [repeated 7x across cluster]\n", - "(PredictionWorker pid=10041) Read how to opt-out here: [repeated 7x across cluster]\n", - "(PredictionWorker pid=10041) https://aimstack.readthedocs.io/en/latest/community/telemetry.html [repeated 7x across cluster]\n", - "(PredictionWorker pid=10041) Using /home/ray/.cache/torch_extensions/py39_cu116 as PyTorch extensions root... [repeated 7x across cluster]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(PredictionWorker pid=10038) [9/9] c++ pt_binding.o gelu.cuda.o relu.cuda.o layer_norm.cuda.o softmax.cuda.o dequantize.cuda.o apply_rotary_pos_emb.cuda.o transform.cuda.o -shared -lcurand -L/home/ray/anaconda3/lib/python3.9/site-packages/torch/lib -lc10 -lc10_cuda -ltorch_cpu -ltorch_cuda_cu -ltorch_cuda_cpp -ltorch -ltorch_python -L/usr/local/cuda/lib64 -lcudart -o transformer_inference.so\n", - "(PredictionWorker pid=10038) Time to load transformer_inference op: 46.834928035736084 seconds\n", - "(PredictionWorker pid=10038) [2023-04-22 11:15:21,799] [INFO] [logging.py:93:log_dist] [Rank 0] DeepSpeed-Inference config: {'layer_id': 0, 'hidden_size': 7168, 'intermediate_size': 28672, 'heads': 56, 'num_hidden_layers': -1, 'fp16': True, 'pre_layer_norm': True, 'local_rank': -1, 'stochastic_mode': False, 'epsilon': 1e-12, 'mp_size': 8, 'q_int8': False, 'scale_attention': True, 'triangular_masking': True, 'local_attention': False, 'window_size': 1, 'rotary_dim': -1, 'rotate_half': False, 'rotate_every_two': True, 'return_tuple': True, 'mlp_after_attn': True, 'mlp_act_func_type': , 'specialized_mode': False, 'training_mp_size': 1, 'bigscience_bloom': False, 'max_out_tokens': 1024, 'scale_attn_by_inverse_layer_idx': False, 'enable_qkv_quantization': False, 'use_mup': False, 'return_single_tuple': False}\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "(PredictionWorker pid=10040) No modifications detected for re-loaded extension module transformer_inference, skipping build step...\n", - "Loading 7 checkpoint shards: 0%| | 0/7 [00:00 0.5).astype(int), batch_format=\"pandas\")\n", - " )\n", - " print(f\"PREDICTED LABELS\")\n", - " predicted_labels.show()" - ] - }, - { + "attachments": {}, "cell_type": "markdown", "id": "7d073994", "metadata": {}, @@ -339,70 +311,14 @@ ] }, { + "attachments": {}, "cell_type": "markdown", - "id": "0daba603", + "id": "e11cf27b", "metadata": {}, "source": [ - "And perform inference on the obtained model:" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "24b16ede", - "metadata": { - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2022-06-22 17:27:59,658\tWARNING read_api.py:260 -- The number of blocks in this dataset (1) limits its parallelism to 1 concurrent tasks. This is much less than the number of available CPU slots in the cluster. Use `.repartition(n)` to increase the number of dataset blocks.\n", - "Map_Batches: 100%|██████████| 1/1 [00:00<00:00, 64.73it/s]\n", - "Map Progress (1 actors 1 pending): 100%|██████████| 1/1 [00:01<00:00, 1.60s/it]\n", - "Map_Batches: 100%|██████████| 1/1 [00:00<00:00, 71.41it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "PREDICTED LABELS\n", - "{'predictions': 1}\n", - "{'predictions': 1}\n", - "{'predictions': 0}\n", - "{'predictions': 1}\n", - "{'predictions': 1}\n", - "{'predictions': 1}\n", - "{'predictions': 1}\n", - "{'predictions': 1}\n", - "{'predictions': 0}\n", - "{'predictions': 1}\n", - "{'predictions': 0}\n", - "{'predictions': 1}\n", - "{'predictions': 1}\n", - "{'predictions': 1}\n", - "{'predictions': 1}\n", - "{'predictions': 0}\n", - "{'predictions': 1}\n", - "{'predictions': 1}\n", - "{'predictions': 1}\n", - "{'predictions': 0}\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], - "source": [ - "predict_sklearn(result, use_gpu=False)" + "## Next steps\n", + "\n", + "- {ref}`End-to-end: Offline Batch Inference `" ] } ], diff --git a/doc/source/ray-air/examples/tf_tabular_batch_prediction.py b/doc/source/ray-air/examples/tf_tabular_batch_prediction.py deleted file mode 100644 index c4fae8c9a688..000000000000 --- a/doc/source/ray-air/examples/tf_tabular_batch_prediction.py +++ /dev/null @@ -1,49 +0,0 @@ -import numpy as np - -import ray -from ray.data.preprocessors import Concatenator -from ray.train.tensorflow import TensorflowCheckpoint, TensorflowPredictor -from ray.train.batch_predictor import BatchPredictor - - -def create_model(input_features): - from tensorflow import keras # this is needed for tf<2.9 - from tensorflow.keras import layers - - return keras.Sequential( - [ - keras.Input(shape=(input_features,)), - layers.Dense(16, activation="relu"), - layers.Dense(16, activation="relu"), - layers.Dense(1, activation="sigmoid"), - ] - ) - - -dataset = ray.data.read_csv("s3://anonymous@air-example-data/breast_cancer.csv") - -# All columns are features except the target column. -num_features = len(dataset.schema().names) - 1 - -# Specify a preprocessor to concatenate all feature columns. -prep = Concatenator( - output_column_name="concat_features", exclude=["target"], dtype=np.float32 -) - -checkpoint = TensorflowCheckpoint.from_model( - model=create_model(num_features), preprocessor=prep -) -# You can also fetch a checkpoint from a Trainer -# checkpoint = trainer.fit().checkpoint - -batch_predictor = BatchPredictor.from_checkpoint( - checkpoint, TensorflowPredictor, model_definition=lambda: create_model(num_features) -) - -predicted_probabilities = batch_predictor.predict( - dataset, feature_columns=["concat_features"] -) -# Call show on the output probabilities to trigger execution. -predicted_probabilities.show() -# {'predictions': array([1.], dtype=float32)} -# {'predictions': array([0.], dtype=float32)} diff --git a/doc/source/ray-air/examples/tf_tabular_starter.py b/doc/source/ray-air/examples/tf_tabular_starter.py index 56e66f2f60b3..3b862092d5ff 100644 --- a/doc/source/ray-air/examples/tf_tabular_starter.py +++ b/doc/source/ray-air/examples/tf_tabular_starter.py @@ -131,24 +131,3 @@ def train_loop_per_worker(config): print("Best Result:", best_result) # Best Result: Result(metrics={'loss': 4.997025489807129, ...) # __air_tune_generic_end__ - -# __air_tf_batchpred_start__ -from ray.train.batch_predictor import BatchPredictor -from ray.train.tensorflow import TensorflowPredictor - -# You can also create a checkpoint from a trained model using `TensorflowCheckpoint`. -checkpoint = best_result.checkpoint - -batch_predictor = BatchPredictor.from_checkpoint( - checkpoint, - TensorflowPredictor, - model_definition=lambda: create_keras_model(num_features), -) - -predicted_probabilities = batch_predictor.predict(test_dataset) -predicted_probabilities.show() -# {'predictions': 0.033036969602108} -# {'predictions': 0.05944341793656349} -# {'predictions': 0.1657751202583313} -# ... -# __air_tf_batchpred_end__ diff --git a/doc/source/ray-air/examples/torch_detection.ipynb b/doc/source/ray-air/examples/torch_detection.ipynb index 1daaa8631657..5095caa7b77b 100644 --- a/doc/source/ray-air/examples/torch_detection.ipynb +++ b/doc/source/ray-air/examples/torch_detection.ipynb @@ -837,155 +837,12 @@ }, { "cell_type": "markdown", - "id": "224a1139", + "id": "838101c2", "metadata": {}, "source": [ - "## Evaluate the model on test data\n", + "## Next steps\n", "\n", - "Now that you've fine-tuned the model, you'll evaluate it on the test data.\n", - "\n", - "### Generate predictions on the test data" - ] - }, - { - "cell_type": "markdown", - "id": "1fc9bac2", - "metadata": {}, - "source": [ - "`Predictors` let you perform scalable [batch prediction](batch-prediction) and\n", - "[online inference](air-serving-guide). To evaluate the model, you'll use\n", - "`BatchPredictor` to perform inference in a distributed fashion.\n", - "\n", - "Create a `BatchPredictor` and pass `TorchDetectionPredictor` to the constructor. Then,\n", - "call `BatchPredictor.predict` to detect objects in the test data." - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "cc3cc662", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-03-01 13:08:48,113\tINFO batch_predictor.py:214 -- `num_gpus_per_worker` is set for `BatchPreditor`.Automatically enabling GPU prediction for this predictor. To disable set `use_gpu` to `False` in `BatchPredictor.predict`.\n", - "2023-03-01 13:08:48,945\tINFO bulk_executor.py:41 -- Executing DAG InputDataBuffer[Input] -> TaskPoolMapOperator[TorchVisionPreprocessor] -> ActorPoolMapOperator[MapBatches(ScoringWrapper)]\n", - "TorchVisionPreprocessor: 100%|██████████| 26/26 [00:17<00:00, 1.49it/s]\n", - "MapBatches(ScoringWrapper), 0 actors [26 locality hits, 0 misses]: 100%|██████████| 26/26 [00:32<00:00, 1.25s/it] \n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "468b32006b5f440dae152b288d84d5d3", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "VBox(children=(HTML(value='

Dataset

'), Tab(children=(HTML(value='
`" ] } ], diff --git a/doc/source/ray-air/examples/torch_image_batch_pretrained.py b/doc/source/ray-air/examples/torch_image_batch_pretrained.py deleted file mode 100644 index dc0f4ed874ae..000000000000 --- a/doc/source/ray-air/examples/torch_image_batch_pretrained.py +++ /dev/null @@ -1,30 +0,0 @@ -from torchvision import transforms -from torchvision.models import resnet18 - -import ray -from ray.train.torch import TorchCheckpoint, TorchPredictor -from ray.train.batch_predictor import BatchPredictor -from ray.data.preprocessors import TorchVisionPreprocessor - - -data_url = "s3://anonymous@air-example-data-2/1G-image-data-synthetic-raw" -print(f"Running GPU batch prediction with 1GB data from {data_url}") -dataset = ray.data.read_images(data_url, size=(256, 256)).limit(10) - -model = resnet18(pretrained=True) - -transform = transforms.Compose( - [ - transforms.ToTensor(), - transforms.CenterCrop(224), - transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] -) -preprocessor = TorchVisionPreprocessor(columns=["image"], transform=transform) - -ckpt = TorchCheckpoint.from_model(model=model, preprocessor=preprocessor) - -predictor = BatchPredictor.from_checkpoint(ckpt, TorchPredictor) -predictions = predictor.predict(dataset, batch_size=80, num_gpus_per_worker=1) -# Call show on the output probabilities to trigger execution -predictions.show() diff --git a/doc/source/ray-air/examples/torch_incremental_learning.ipynb b/doc/source/ray-air/examples/torch_incremental_learning.ipynb index 4bf5fd448fd2..b0cf82f99b1e 100644 --- a/doc/source/ray-air/examples/torch_incremental_learning.ipynb +++ b/doc/source/ray-air/examples/torch_incremental_learning.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "TsniIjjg2Pym" @@ -10,6 +11,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "1VsUrzVm1W-h" @@ -40,6 +42,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "Q3oGiuqYfj9_" @@ -50,11 +53,11 @@ "2. Create an `Iterator[ray.data.Dataset]` abstraction to represent a stream of data to train on for incremental training.\n", "3. Implement a custom Ray AIR preprocessor to preprocess the dataset.\n", "4. Incrementally train a model using data parallel training.\n", - "5. Use our trained model to perform batch prediction on test data.\n", - "6. Incrementally deploying our trained model with Ray Serve and performing online prediction queries." + "5. Incrementally deploying our trained model with Ray Serve and performing online prediction queries." ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "z52Y8O4q1bIk" @@ -83,6 +86,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "RpD4STX3g1dq" @@ -171,6 +175,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "AedcxD_FClQL" @@ -221,6 +226,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "L2N1U22VC_N9" @@ -237,6 +243,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "3SVSrkqrDJuc" @@ -299,6 +306,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "vqrfgfl9YnVe" @@ -394,6 +402,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "HDGHgtb699kd" @@ -492,6 +501,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "9HUciluylZbX" @@ -527,65 +537,13 @@ ] }, { - "cell_type": "markdown", - "metadata": { - "id": "Uto3v90Hagni" - }, - "source": [ - "## 4c: Define logic for Batch/Offline Prediction.\n", - "\n", - "After training on each task, we want to use our trained model to do batch (i.e. offline) inference on a test dataset. \n", - "\n", - "To do this, we leverage the built-in `ray.air.BatchPredictor`. We define a `batch_predict` function that will take in a Checkpoint and a Test Dataset and outputs the accuracy our model achieves on the test dataset." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "id": "DM2lFHzFa6uI" - }, - "outputs": [], - "source": [ - "from ray.train.batch_predictor import BatchPredictor\n", - "from ray.train.torch import TorchPredictor\n", - "\n", - "def batch_predict(checkpoint: ray.air.Checkpoint, test_dataset: ray.data.Dataset) -> float:\n", - " \"\"\"Perform batch prediction on the provided test dataset, and return accuracy results.\"\"\"\n", - "\n", - " batch_predictor = BatchPredictor.from_checkpoint(checkpoint, predictor_cls=TorchPredictor, model=SimpleMLP(num_classes=10))\n", - " model_output = batch_predictor.predict(\n", - " data=test_dataset, feature_columns=[\"image\"], keep_columns=[\"label\"]\n", - " )\n", - " \n", - " # Postprocess model outputs.\n", - " # Convert logits outputted from model into actual class predictions.\n", - " def convert_logits_to_classes(df):\n", - " best_class = df[\"predictions\"].map(lambda x: np.array(x).argmax())\n", - " df[\"predictions\"] = best_class\n", - " return df\n", - " \n", - " prediction_results = model_output.map_batches(convert_logits_to_classes, batch_format=\"pandas\")\n", - " \n", - " # Then, for each prediction output, see if it matches with the ground truth\n", - " # label.\n", - " def calculate_prediction_scores(df):\n", - " return pd.DataFrame({\"correct\": df[\"predictions\"] == df[\"label\"]})\n", - "\n", - " correct_dataset = prediction_results.map_batches(\n", - " calculate_prediction_scores, batch_format=\"pandas\"\n", - " )\n", - "\n", - " return correct_dataset.sum(on=\"correct\") / correct_dataset.count()" - ] - }, - { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "GWiTtsmVbIZP" }, "source": [ - "## 4d: Define logic for Deploying and Querying our model\n", + "## 4c: Define logic for Deploying and Querying our model\n", "\n", "In addition to batch inference, we also want to deploy our model so that we can submit live queries to it for online inference. We use Ray Serve's `PredictorDeployment` utility to deploy our trained model. \n", "\n", @@ -636,6 +594,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "-NQDj0rFVUX3" @@ -648,15 +607,14 @@ "For each dataset in our stream, we do the following:\n", "1. Train on the dataset in Data Parallel fashion. We create a `TorchTrainer`, specify the config for the training loop we defined above, the dataset to train on, and how much we want to scale. `TorchTrainer` also accepts a `checkpoint` arg to continue training from a previously saved checkpoint.\n", "2. Get the saved checkpoint from the training run.\n", - "3. Test our trained model on a test set containing test data from all the tasks trained on so far.\n", "3. After training on each task, we deploy our model so we can query it for predictions.\n", "\n", - "In this example, the training and test data for each task is well-defined beforehand by the benchmark. For real-world scenarios, this probably will not be the case. It is very likely that the prediction requests after training on one task will become the training data for the next task. \n" + "In this example, the training data for each task is well-defined beforehand by the benchmark. For real-world scenarios, this probably will not be the case. It is very likely that the prediction requests after training on one task will become the training data for the next task. \n" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1390,22 +1348,6 @@ " result = trainer.fit()\n", " latest_checkpoint = result.checkpoint\n", "\n", - " # **************Batch Prediction**************************\n", - "\n", - " # We can do batch prediction on the test data for the tasks seen so far.\n", - " # TODO: Fix type signature in Ray Data\n", - " # TODO: Fix dataset.union when used with empty list.\n", - " if len(all_test_datasets_seen_so_far) > 0:\n", - " full_test_dataset = test_dataset.union(*all_test_datasets_seen_so_far)\n", - " else:\n", - " full_test_dataset = test_dataset\n", - "\n", - " all_test_datasets_seen_so_far.append(test_dataset)\n", - "\n", - " accuracy_for_this_task = batch_predict(latest_checkpoint, full_test_dataset)\n", - " print(f\"Accuracy for task {task_idx}: {accuracy_for_this_task}\")\n", - " accuracy_for_all_tasks.append(accuracy_for_this_task)\n", - "\n", " # *************Model Deployment & Online Inference***************************\n", " \n", " # We can also deploy our model to do online inference with Ray Serve.\n", @@ -1423,6 +1365,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "ORWpRkPjcPbD" @@ -1462,328 +1405,7 @@ ] }, { - "cell_type": "markdown", - "metadata": { - "id": "xLLAvsTk8LoV" - }, - "source": [ - "# [Optional] Step 6: Compare against full training.\n", - "\n", - "We have now incrementally trained our simple multi-layer perceptron. Let's compare the incrementally trained model via fine tuning against a model that is trained on all the tasks up front.\n", - "\n", - "Since we are using a naive fine-tuning strategy, we should expect that our incrementally trained model will perform worse than the one that is fully trained! However, there's various other strategies that have been developed and are actively being researched to improve accuracy for incremental training. And overall, incremental/continual learning allows you to train in many real world settings where the entire dataset is not available up front, but new data is arriving at a relatively high rate." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "RNHsEVBHc0p2" - }, - "source": [ - "Let's first combine all of our datasets for each task into a single, unified dataset" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "pU2fVH068lfF", - "outputId": "fd6a3b56-dda1-4fa6-cebd-d0ee8784e698" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Map Progress (1 actors 1 pending): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 1.37it/s]\n", - "Map Progress (1 actors 1 pending): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 1.37it/s]\n", - "Map Progress (1 actors 1 pending): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 1.40it/s]\n", - "Shuffle Map: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 40.34it/s]\n", - "Shuffle Reduce: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 28.99it/s]\n" - ] - } - ], - "source": [ - "train_stream = permuted_mnist.generate_train_stream()\n", - "\n", - "# Collect all datasets in the stream into a single dataset.\n", - "all_training_datasets = []\n", - "for train_dataset in train_stream:\n", - " all_training_datasets.append(train_dataset)\n", - "combined_training_dataset = all_training_datasets[0].union(*all_training_datasets[1:])\n", - "\n", - "\n", - "combined_training_dataset = combined_training_dataset.random_shuffle()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "tJ6Oqdgvc5dn" - }, - "source": [ - "Then, we train a new model on the unified dataset using the same configurations as before." - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - }, - "id": "PmH9c0-z9KME", - "outputId": "653b4dfc-ed47-4307-fa84-e4c4ea3ec354" - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "
\n", - "
\n", - "

Tune Status

\n", - " \n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
Current time:2022-09-23 16:37:13
Running for: 00:00:25.97
Memory: 19.4/62.7 GiB
\n", - "
\n", - "
\n", - "
\n", - "

System Info

\n", - " Using FIFO scheduling algorithm.
Resources requested: 0/24 CPUs, 0/0 GPUs, 0.0/32.53 GiB heap, 0.0/16.26 GiB objects\n", - "
\n", - " \n", - "
\n", - "
\n", - "
\n", - "

Trial Status

\n", - " \n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
Trial name status loc iter total time (s) loss _timestamp _time_this_iter_s
TorchTrainer_971af_00000TERMINATED10.109.175.190:860035 4 22.1282 0 1663976231 0.0924587
\n", - "
\n", - "
\n", - "\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[2m\u001b[36m(RayTrainWorker pid=860154)\u001b[0m 2022-09-23 16:36:55,188\tINFO config.py:71 -- Setting up process group for: env:// [rank=0, world_size=1]\n", - "\u001b[2m\u001b[36m(RayTrainWorker pid=860154)\u001b[0m 2022-09-23 16:36:55,399\tINFO train_loop_utils.py:354 -- Moving model to device: cuda:0\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[2m\u001b[36m(RayTrainWorker pid=860154)\u001b[0m loss: 2.301066, epoch: 0, iteration: 0\n", - "\u001b[2m\u001b[36m(RayTrainWorker pid=860154)\u001b[0m loss: 1.869080, epoch: 0, iteration: 500\n", - "\u001b[2m\u001b[36m(RayTrainWorker pid=860154)\u001b[0m loss: 1.489264, epoch: 0, iteration: 1000\n", - "\u001b[2m\u001b[36m(RayTrainWorker pid=860154)\u001b[0m loss: 1.646756, epoch: 0, iteration: 1500\n", - "\u001b[2m\u001b[36m(RayTrainWorker pid=860154)\u001b[0m loss: 1.582330, epoch: 0, iteration: 2000\n", - "\u001b[2m\u001b[36m(RayTrainWorker pid=860154)\u001b[0m loss: 1.246018, epoch: 0, iteration: 2500\n", - "\u001b[2m\u001b[36m(RayTrainWorker pid=860154)\u001b[0m loss: 1.035204, epoch: 0, iteration: 3000\n", - "\u001b[2m\u001b[36m(RayTrainWorker pid=860154)\u001b[0m loss: 0.872962, epoch: 0, iteration: 3500\n", - "\u001b[2m\u001b[36m(RayTrainWorker pid=860154)\u001b[0m loss: 1.138829, epoch: 0, iteration: 4000\n", - "\u001b[2m\u001b[36m(RayTrainWorker pid=860154)\u001b[0m loss: 0.753354, epoch: 0, iteration: 4500\n", - "\u001b[2m\u001b[36m(RayTrainWorker pid=860154)\u001b[0m loss: 0.991935, epoch: 0, iteration: 5000\n", - "\u001b[2m\u001b[36m(RayTrainWorker pid=860154)\u001b[0m loss: 0.928292, epoch: 0, iteration: 5500\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "

Trial Progress

\n", - " \n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
Trial name _time_this_iter_s _timestamp _training_iterationdate done episodes_total experiment_id experiment_taghostname iterations_since_restore lossnode_ip pidshould_checkpoint time_since_restore time_this_iter_s time_total_s timestamp timesteps_since_restoretimesteps_total training_iterationtrial_id warmup_time
TorchTrainer_971af_00000 0.0924587 1663976231 42022-09-23_16-37-11True 26d685b2612a4752b7d062d1ebfb89f0 0corvus 4 010.109.175.190860035True 22.1282 0.0941384 22.1282 1663976231 0 4971af_00000 0.0034101
\n", - "
\n", - "\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2022-09-23 16:37:13,525\tINFO tune.py:762 -- Total run time: 26.08 seconds (25.96 seconds for the tuning loop).\n" - ] - } - ], - "source": [ - "# Now we do training with the same configurations as before\n", - "trainer = TorchTrainer(\n", - " train_loop_per_worker=train_loop_per_worker,\n", - " train_loop_config={\n", - " \"num_epochs\": num_epochs,\n", - " \"learning_rate\": learning_rate,\n", - " \"momentum\": momentum,\n", - " \"batch_size\": batch_size,\n", - " },\n", - " # Have to specify trainer_resources as 0 so that the example works on Colab. \n", - " scaling_config=ScalingConfig(num_workers=num_workers, use_gpu=use_gpu, trainer_resources={\"CPU\": 0}),\n", - " datasets={\"train\": combined_training_dataset},\n", - " preprocessor=mnist_normalize_preprocessor,\n", - " )\n", - "result = trainer.fit()\n", - "full_training_checkpoint = result.checkpoint" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "jLaOcmBddRqB" - }, - "source": [ - "Then, let's test model that was trained on all the tasks up front." - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "WC7zV_Cw9TAi", - "outputId": "12a86f2b-be90-47b6-e252-25e3199689f9" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Map Progress (1 actors 1 pending): 0%| | 0/3 [00:01 \u001b[0m\u001b[32;49m23.1.2\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" + ] + } + ], "source": [ "!pip install -qU \"ray[tune]\" xgboost_ray" ] }, { + "attachments": {}, "cell_type": "markdown", "id": "d2fe8d4a", "metadata": { @@ -56,27 +68,14 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 22, "id": "7232303d", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/ubuntu/ray/venv/lib/python3.8/site-packages/xgboost/compat.py:31: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - " from pandas import MultiIndex, Int64Index\n", - "FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - "FutureWarning: pandas.Float64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - "FutureWarning: pandas.UInt64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n" - ] - } - ], + "outputs": [], "source": [ "from typing import Tuple\n", "\n", "import ray\n", - "from ray.train.batch_predictor import BatchPredictor\n", "from ray.train.xgboost import XGBoostPredictor\n", "from ray.train.xgboost import XGBoostTrainer\n", "from ray.air.config import ScalingConfig\n", @@ -86,6 +85,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "1c75b5ca", "metadata": { @@ -99,7 +99,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 23, "id": "37c4f38f", "metadata": {}, "outputs": [], @@ -112,6 +112,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "9b2850dd", "metadata": { @@ -125,7 +126,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 24, "id": "dae8998d", "metadata": { "pycharm": { @@ -163,6 +164,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "ce05af87", "metadata": {}, @@ -172,7 +174,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 25, "id": "5b8076d3", "metadata": { "pycharm": { @@ -181,26 +183,37 @@ }, "outputs": [], "source": [ + "import pandas as pd\n", + "from ray.air import Checkpoint\n", + "from ray.data import ActorPoolStrategy\n", + "\n", + "\n", + "class Predict:\n", + "\n", + " def __init__(self, checkpoint: Checkpoint):\n", + " self.predictor = XGBoostPredictor.from_checkpoint(checkpoint)\n", + "\n", + " def __call__(self, batch: pd.DataFrame) -> pd.DataFrame:\n", + " return self.predictor.predict(batch)\n", + "\n", + "\n", "def predict_xgboost(result: Result):\n", " _, _, test_dataset = prepare_data()\n", "\n", - " batch_predictor = BatchPredictor.from_checkpoint(\n", - " result.checkpoint, XGBoostPredictor\n", - " )\n", - "\n", - " predicted_labels = (\n", - " batch_predictor.predict(test_dataset)\n", - " .map_batches(lambda df: (df > 0.5).astype(int), batch_format=\"pandas\")\n", + " scores = test_dataset.map_batches(\n", + " Predict, \n", + " fn_constructor_args=[result.checkpoint], \n", + " compute=ActorPoolStrategy(), \n", + " batch_format=\"pandas\"\n", " )\n", + " \n", + " predicted_labels = scores.map_batches(lambda df: (df > 0.5).astype(int), batch_format=\"pandas\")\n", " print(f\"PREDICTED LABELS\")\n", - " predicted_labels.show()\n", - "\n", - " shap_values = batch_predictor.predict(test_dataset, pred_contribs=True)\n", - " print(f\"SHAP VALUES\")\n", - " shap_values.show()\n" + " predicted_labels.show()" ] }, { + "attachments": {}, "cell_type": "markdown", "id": "7e172f66", "metadata": {}, @@ -210,7 +223,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 26, "id": "0f96d62b", "metadata": { "pycharm": { @@ -218,26 +231,71 @@ } }, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2022-06-22 17:28:55,841\tINFO services.py:1477 -- View the Ray dashboard at \u001b[1m\u001b[32mhttp://127.0.0.1:8270\u001b[39m\u001b[22m\n", - "2022-06-22 17:28:58,044\tWARNING read_api.py:260 -- The number of blocks in this dataset (1) limits its parallelism to 1 concurrent tasks. This is much less than the number of available CPU slots in the cluster. Use `.repartition(n)` to increase the number of dataset blocks.\n", - "Map_Batches: 100%|██████████| 1/1 [00:00<00:00, 40.28it/s]\n" - ] - }, { "data": { "text/html": [ - "== Status ==
Current time: 2022-06-22 17:29:15 (running for 00:00:16.11)
Memory usage on this node: 11.5/31.0 GiB
Using FIFO scheduling algorithm.
Resources requested: 0/8 CPUs, 0/0 GPUs, 0.0/12.35 GiB heap, 0.0/6.18 GiB objects
Result logdir: /home/ubuntu/ray_results/XGBoostTrainer_2022-06-22_17-28-58
Number of trials: 1/1 (1 TERMINATED)
\n", + "
\n", + "
\n", + "
\n", + "

Tune Status

\n", + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Current time:2023-07-06 18:33:25
Running for: 00:00:06.19
Memory: 14.9/64.0 GiB
\n", + "
\n", + "
\n", + "
\n", + "

System Info

\n", + " Using FIFO scheduling algorithm.
Logical resource usage: 2.0/10 CPUs, 0/0 GPUs\n", + "
\n", + " \n", + " \n", + "
\n", + "
\n", + "

Trial Status

\n", + " \n", "\n", - "\n", + "\n", "\n", "\n", - "\n", + "\n", "\n", - "
Trial name status loc iter total time (s) train-logloss train-error valid-logloss
Trial name status loc iter total time (s) train-logloss train-error valid-logloss
XGBoostTrainer_cc863_00000TERMINATED172.31.43.110:1493910 100 12.5164 0.005874 0 0.078188
XGBoostTrainer_40fed_00000TERMINATED127.0.0.1:40725 101 4.90132 0.00587595 0 0.06215


" + "\n", + "
\n", + "\n", + "\n" ], "text/plain": [ "" @@ -250,119 +308,95 @@ "name": "stderr", "output_type": "stream", "text": [ - "\u001b[2m\u001b[36m(pid=1493910)\u001b[0m /home/ubuntu/ray/venv/lib/python3.8/site-packages/xgboost/compat.py:31: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - "\u001b[2m\u001b[36m(pid=1493910)\u001b[0m from pandas import MultiIndex, Int64Index\n", - "\u001b[2m\u001b[36m(pid=1493910)\u001b[0m FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - "\u001b[2m\u001b[36m(pid=1493910)\u001b[0m FutureWarning: pandas.Float64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - "\u001b[2m\u001b[36m(pid=1493910)\u001b[0m FutureWarning: pandas.UInt64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - "\u001b[2m\u001b[36m(XGBoostTrainer pid=1493910)\u001b[0m UserWarning: Dataset 'train' has 1 blocks, which is less than the `num_workers` 2. This dataset will be automatically repartitioned to 2 blocks.\n", - "\u001b[2m\u001b[36m(XGBoostTrainer pid=1493910)\u001b[0m UserWarning: Dataset 'valid' has 1 blocks, which is less than the `num_workers` 2. This dataset will be automatically repartitioned to 2 blocks.\n", - "\u001b[2m\u001b[36m(XGBoostTrainer pid=1493910)\u001b[0m 2022-06-22 17:29:04,073\tINFO main.py:980 -- [RayXGBoost] Created 2 new actors (2 total actors). Waiting until actors are ready for training.\n", - "\u001b[2m\u001b[36m(pid=1494007)\u001b[0m /home/ubuntu/ray/venv/lib/python3.8/site-packages/xgboost/compat.py:31: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - "\u001b[2m\u001b[36m(pid=1494007)\u001b[0m from pandas import MultiIndex, Int64Index\n", - "\u001b[2m\u001b[36m(pid=1494008)\u001b[0m /home/ubuntu/ray/venv/lib/python3.8/site-packages/xgboost/compat.py:31: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - "\u001b[2m\u001b[36m(pid=1494008)\u001b[0m from pandas import MultiIndex, Int64Index\n", - "\u001b[2m\u001b[36m(pid=1494009)\u001b[0m /home/ubuntu/ray/venv/lib/python3.8/site-packages/xgboost/compat.py:31: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - "\u001b[2m\u001b[36m(pid=1494009)\u001b[0m from pandas import MultiIndex, Int64Index\n", - "\u001b[2m\u001b[36m(pid=1494007)\u001b[0m FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - "\u001b[2m\u001b[36m(pid=1494007)\u001b[0m FutureWarning: pandas.Float64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - "\u001b[2m\u001b[36m(pid=1494007)\u001b[0m FutureWarning: pandas.UInt64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - "\u001b[2m\u001b[36m(pid=1494008)\u001b[0m FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - "\u001b[2m\u001b[36m(pid=1494008)\u001b[0m FutureWarning: pandas.Float64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - "\u001b[2m\u001b[36m(pid=1494008)\u001b[0m FutureWarning: pandas.UInt64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - "\u001b[2m\u001b[36m(_RemoteRayXGBoostActor pid=1494008)\u001b[0m 2022-06-22 17:29:07,324\tWARNING __init__.py:190 -- DeprecationWarning: `ray.worker.get_resource_ids` is a private attribute and access will be removed in a future Ray version.\n", - "\u001b[2m\u001b[36m(pid=1494009)\u001b[0m FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - "\u001b[2m\u001b[36m(pid=1494009)\u001b[0m FutureWarning: pandas.Float64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - "\u001b[2m\u001b[36m(pid=1494009)\u001b[0m FutureWarning: pandas.UInt64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - "\u001b[2m\u001b[36m(_RemoteRayXGBoostActor pid=1494009)\u001b[0m 2022-06-22 17:29:07,421\tWARNING __init__.py:190 -- DeprecationWarning: `ray.worker.get_resource_ids` is a private attribute and access will be removed in a future Ray version.\n", - "\u001b[2m\u001b[36m(XGBoostTrainer pid=1493910)\u001b[0m 2022-06-22 17:29:07,874\tINFO main.py:1025 -- [RayXGBoost] Starting XGBoost training.\n", - "\u001b[2m\u001b[36m(_RemoteRayXGBoostActor pid=1494008)\u001b[0m [17:29:07] task [xgboost.ray]:139731353900128 got new rank 0\n", - "\u001b[2m\u001b[36m(_RemoteRayXGBoostActor pid=1494008)\u001b[0m FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - "\u001b[2m\u001b[36m(_RemoteRayXGBoostActor pid=1494009)\u001b[0m [17:29:07] task [xgboost.ray]:140076138558608 got new rank 1\n", - "\u001b[2m\u001b[36m(_RemoteRayXGBoostActor pid=1494009)\u001b[0m FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - "\u001b[2m\u001b[36m(_QueueActor pid=1494006)\u001b[0m /home/ubuntu/ray/venv/lib/python3.8/site-packages/xgboost/compat.py:31: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - "\u001b[2m\u001b[36m(_QueueActor pid=1494006)\u001b[0m from pandas import MultiIndex, Int64Index\n", - "\u001b[2m\u001b[36m(_QueueActor pid=1494006)\u001b[0m FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - "\u001b[2m\u001b[36m(_QueueActor pid=1494006)\u001b[0m FutureWarning: pandas.Float64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", - "\u001b[2m\u001b[36m(_QueueActor pid=1494006)\u001b[0m FutureWarning: pandas.UInt64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Result for XGBoostTrainer_cc863_00000:\n", - " date: 2022-06-22_17-29-09\n", - " done: false\n", - " experiment_id: dc3dac01a34043cfb5751907e2bc648e\n", - " hostname: ip-172-31-43-110\n", - " iterations_since_restore: 1\n", - " node_ip: 172.31.43.110\n", - " pid: 1493910\n", - " should_checkpoint: true\n", - " time_since_restore: 7.967940330505371\n", - " time_this_iter_s: 7.967940330505371\n", - " time_total_s: 7.967940330505371\n", - " timestamp: 1655918949\n", - " timesteps_since_restore: 0\n", - " train-error: 0.017588\n", - " train-logloss: 0.464648\n", - " training_iteration: 1\n", - " trial_id: cc863_00000\n", - " valid-error: 0.081871\n", - " valid-logloss: 0.496374\n", - " warmup_time: 0.004768848419189453\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[2m\u001b[36m(XGBoostTrainer pid=1493910)\u001b[0m 2022-06-22 17:29:14,546\tINFO main.py:1516 -- [RayXGBoost] Finished XGBoost training on training data with total N=398 in 10.52 seconds (6.66 pure XGBoost training time).\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Result for XGBoostTrainer_cc863_00000:\n", - " date: 2022-06-22_17-29-14\n", - " done: true\n", - " experiment_id: dc3dac01a34043cfb5751907e2bc648e\n", - " experiment_tag: '0'\n", - " hostname: ip-172-31-43-110\n", - " iterations_since_restore: 100\n", - " node_ip: 172.31.43.110\n", - " pid: 1493910\n", - " should_checkpoint: true\n", - " time_since_restore: 12.516392230987549\n", - " time_this_iter_s: 0.03008890151977539\n", - " time_total_s: 12.516392230987549\n", - " timestamp: 1655918954\n", - " timesteps_since_restore: 0\n", - " train-error: 0.0\n", - " train-logloss: 0.005874\n", - " training_iteration: 100\n", - " trial_id: cc863_00000\n", - " valid-error: 0.040936\n", - " valid-logloss: 0.078188\n", - " warmup_time: 0.004768848419189453\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2022-06-22 17:29:15,362\tINFO tune.py:734 -- Total run time: 16.94 seconds (16.08 seconds for the tuning loop).\n" + "\u001b[2m\u001b[36m(XGBoostTrainer pid=40725)\u001b[0m The `preprocessor` arg to Trainer is deprecated. Apply preprocessor transformations ahead of time by calling `preprocessor.transform(ds)`. Support for the preprocessor arg will be dropped in a future release.\n", + "\u001b[2m\u001b[36m(XGBoostTrainer pid=40725)\u001b[0m Tip: Use `take_batch()` instead of `take() / show()` to return records in pandas or numpy batch format.\n", + "\u001b[2m\u001b[36m(XGBoostTrainer pid=40725)\u001b[0m Executing DAG InputDataBuffer[Input] -> AllToAllOperator[Aggregate]\n", + "\u001b[2m\u001b[36m(XGBoostTrainer pid=40725)\u001b[0m Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=None), locality_with_output=False, preserve_order=False, actor_locality_enabled=True, verbose_progress=False)\n", + "\u001b[2m\u001b[36m(XGBoostTrainer pid=40725)\u001b[0m Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`\n", + "\n", + "\u001b[A\n", + "\u001b[A\n", + "\n", + "\u001b[A\u001b[A\n", + "\n", + "(pid=40725) Running: 0.0/10.0 CPU, 0.0/0.0 GPU, 0.0 MiB/512.0 MiB object_store_memory: 0%| | 0/14 [00:00 TaskPoolMapOperator[MapBatches(StandardScaler._transform_pandas)]\n", + "\n", + "\u001b[A\n", + "\n", + "(pid=40725) Running: 0.0/10.0 CPU, 0.0/0.0 GPU, 0.0 MiB/512.0 MiB object_store_memory: 0%| | 0/14 [00:01 TaskPoolMapOperator[MapBatches(StandardScaler._transform_pandas)]\n", + "\n", + "\u001b[A\n", + "\n", + "(pid=40725) Running: 0.0/10.0 CPU, 0.0/0.0 GPU, 0.0 MiB/512.0 MiB object_store_memory: 0%| | 0/14 [00:01 ActorPoolMapOperator[MapBatches()->MapBatches(Predict)] -> TaskPoolMapOperator[MapBatches()]\n", + "2023-07-06 18:33:28,112\tINFO streaming_executor.py:93 -- Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=None), locality_with_output=False, preserve_order=False, actor_locality_enabled=True, verbose_progress=False)\n", + "2023-07-06 18:33:28,114\tINFO streaming_executor.py:95 -- Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`\n", + "2023-07-06 18:33:28,150\tINFO actor_pool_map_operator.py:117 -- MapBatches()->MapBatches(Predict): Waiting for 1 pool actors to start...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "PREDICTED LABELS\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " " ] }, { "name": "stdout", "output_type": "stream", "text": [ - "PREDICTED LABELS\n", "{'predictions': 1}\n", "{'predictions': 1}\n", "{'predictions': 0}\n", @@ -435,63 +478,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Map_Batches: 0%| | 0/1 [00:00` or :ref:`Tuning `, use the trained model for scalable batch prediction with a ``BatchPredictor``. +After running the steps in :ref:`Training ` or +:ref:`Tuning `, use the trained model for scalable batch +prediction with :meth:`Dataset.map_batches() `. -.. tabs:: - - .. group-tab:: XGBoost - - .. literalinclude:: examples/xgboost_starter.py - :language: python - :start-after: __air_xgb_batchpred_start__ - :end-before: __air_xgb_batchpred_end__ - - .. group-tab:: Pytorch +To learn more, see :ref:`End-to-end: Offline Batch Inference `. - .. literalinclude:: examples/pytorch_tabular_starter.py - :language: python - :start-after: __air_pytorch_batchpred_start__ - :end-before: __air_pytorch_batchpred_end__ - - .. group-tab:: Tensorflow - - .. literalinclude:: examples/tf_tabular_starter.py - :language: python - :start-after: __air_tf_batchpred_start__ - :end-before: __air_tf_batchpred_end__ Project Status -------------- @@ -216,4 +198,4 @@ Next Steps - :ref:`air-examples-ref` - :ref:`API reference ` - :ref:`Technical whitepaper ` -- To check how your application is doing, you can use the :ref:`Ray dashboard`. +- To check how your application is doing, you can use the :ref:`Ray dashboard`. diff --git a/doc/source/ray-air/predictors.rst b/doc/source/ray-air/predictors.rst index 0656bcd4428a..6b87de6f3917 100644 --- a/doc/source/ray-air/predictors.rst +++ b/doc/source/ray-air/predictors.rst @@ -138,51 +138,6 @@ Here are some examples: :end-before: __configure_batch_predictor_scaling_end__ - -Batch Inference Examples ------------------------- -Below, we provide examples of using common frameworks to do batch inference for different data types: - -Tabular -~~~~~~~ - -.. tab-set:: - - .. tab-item:: XGBoost - - .. literalinclude:: examples/xgboost_batch_prediction.py - :language: python - - .. tab-item:: Pytorch - - .. literalinclude:: examples/pytorch_tabular_batch_prediction.py - :language: python - - .. tab-item:: Tensorflow - - .. literalinclude:: examples/tf_tabular_batch_prediction.py - :language: python - -Image -~~~~~ - -.. tab-set:: - - .. tab-item:: Pytorch - - .. literalinclude:: examples/torch_image_batch_pretrained.py - :language: python - - - .. tab-item:: Tensorflow - - Coming soon! - -Text -~~~~ - -Coming soon! - Developer Guide: Implementing your own Predictor ------------------------------------------------ diff --git a/doc/source/ray-overview/examples.rst b/doc/source/ray-overview/examples.rst index 1ab70176df2b..176e7a6f4876 100644 --- a/doc/source/ray-overview/examples.rst +++ b/doc/source/ray-overview/examples.rst @@ -161,13 +161,6 @@ Ray Examples How to fine-tune a DreamBooth text-to-image model with your own images. - .. grid-item-card:: :bdg-secondary:`Code example` - :class-item: gallery-item data-processing inference - - .. button-ref:: /ray-air/examples/opt_deepspeed_batch_inference - - How to run batch inference on a dataset of texts with a 30B OPT model - .. grid-item-card:: :bdg-secondary:`Code example` :class-item: gallery-item training diff --git a/doc/source/ray-overview/getting-started.md b/doc/source/ray-overview/getting-started.md index 8a0fce79f92a..6ff4267c9bbb 100644 --- a/doc/source/ray-overview/getting-started.md +++ b/doc/source/ray-overview/getting-started.md @@ -75,13 +75,10 @@ Run hyperparameter tuning with Ray Tune to find the best model: `````{dropdown} Use the trained model for Batch prediction -Use the trained model for batch prediction with a ``BatchPredictor``. +Use the trained model for batch prediction with +``Dataset.map_batches()``. -```{literalinclude} ../ray-air/examples/xgboost_starter.py - :language: python - :start-after: __air_xgb_batchpred_start__ - :end-before: __air_xgb_batchpred_end__ -``` +To learn more, see :ref:`End-to-end: Offline Batch Inference `. ```{button-ref} air :color: primary diff --git a/doc/source/train/examples/lightning/lightning_cola_advanced.ipynb b/doc/source/train/examples/lightning/lightning_cola_advanced.ipynb index 488532146c38..f6807aabd53b 100644 --- a/doc/source/train/examples/lightning/lightning_cola_advanced.ipynb +++ b/doc/source/train/examples/lightning/lightning_cola_advanced.ipynb @@ -11,7 +11,7 @@ "\n", ":::{note}\n", "\n", - "This is an advanced example for {class}`LightningTrainer `, which demonstrates how to use LightningTrainer with {ref}`Dataset ` and {ref}`Batch Predictor `. \n", + "This is an advanced example for {class}`LightningTrainer `, which demonstrates how to use LightningTrainer with {ref}`Dataset `. \n", "\n", "If you just want to quickly convert your existing PyTorch Lightning scripts into Ray AIR, you can refer to this starter example:\n", "{ref}`Train a Pytorch Lightning Image Classifier `.\n", @@ -22,9 +22,7 @@ "In particular, we will:\n", "- Create Ray Data from the original CoLA dataset.\n", "- Define a preprocessor to tokenize the sentences.\n", - "- Finetune a BERT model using LightningTrainer.\n", - "- Construct a BatchPredictor with the checkpoint and preprocessor.\n", - "- Do batch prediction on multiple GPUs, and evaluate the results." + "- Finetune a BERT model using LightningTrainer." ] }, { @@ -41,6 +39,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -57,6 +56,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -1369,121 +1369,6 @@ "result" ] }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Do Batch Inference with a Saved Checkpoint" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now that we have fine-tuned the module, we can load the checkpoint into a BatchPredictor and perform fast inference with multiple GPUs. It will distribute the inference workload across multiple workers when calling `predict()` and run prediction on multiple shards of data in parallel. \n", - "\n", - "You can find more details in [Using Predictors for Inference](air-predictors)." - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "from ray.train.batch_predictor import BatchPredictor\n", - "from ray.train.lightning import LightningCheckpoint, LightningPredictor\n", - "\n", - "# Use in-memory checkpoint object\n", - "checkpoint = result.checkpoint\n", - "\n", - "# You can also load a checkpoint from disk:\n", - "# YOUR_CHECKPOINT_DIR = result.checkpoint.path\n", - "# checkpoint = LightningCheckpoint.from_directory(YOUR_CHECKPOINT_DIR)\n", - "\n", - "batch_predictor = BatchPredictor(\n", - " checkpoint=checkpoint,\n", - " predictor_cls=LightningPredictor,\n", - " use_gpu=True,\n", - " model_class=SentimentModel,\n", - " preprocessor=preprocessor,\n", - ")\n", - "\n", - "# Use 2 GPUs for batch inference\n", - "predictions = batch_predictor.predict(\n", - " ray_datasets[\"validation\"],\n", - " feature_columns=[\"input_ids\", \"attention_mask\", \"label\"],\n", - " keep_columns=[\"label\"],\n", - " batch_size=16,\n", - " min_scoring_workers=2,\n", - " max_scoring_workers=2,\n", - " num_gpus_per_worker=1,\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We obtained a Ray dataset containing predictions from `batch_predictor.predict()`. Now we can easily evaluate the results with just a few lines of code:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# Internally, BatchPredictor calls forward() method of the LightningModule.\n", - "# Convert the logits tensor into labels with argmax.\n", - "def argmax(batch):\n", - " batch[\"predictions\"] = batch[\"predictions\"].apply(lambda x: np.argmax(x))\n", - " return batch\n", - "\n", - "\n", - "results = predictions.map_batches(argmax, batch_format=\"pandas\").to_pandas()" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " predictions label\n", - "0 1 1\n", - "1 1 1\n", - "2 0 1\n", - "3 1 1\n", - "4 0 0\n", - "5 1 0\n", - "6 1 0\n", - "7 1 1\n", - "8 1 1\n", - "9 1 1\n", - "\n", - "{'matthews_correlation': 0.5899314497879129}\n" - ] - } - ], - "source": [ - "matthews_corr = metric.compute(\n", - " predictions=results[\"predictions\"], references=results[\"label\"]\n", - ")\n", - "print(results.head(10))\n", - "print(matthews_corr)" - ] - }, { "attachments": {}, "cell_type": "markdown", @@ -1493,7 +1378,8 @@ "\n", "- {ref}`Fine-tune a Large Language Model with LightningTrainer and FSDP `\n", "- {ref}`Hyperparameter searching with LightningTrainer + Ray Tune. `\n", - "- {ref}`Experiment Tracking with Wandb, CometML, MLFlow, and Tensorboard in LightningTrainer `" + "- {ref}`Experiment Tracking with Wandb, CometML, MLFlow, and Tensorboard in LightningTrainer `\n", + "- {ref}`End-to-end: Offline Batch Inference `" ] } ], diff --git a/python/ray/train/examples/huggingface/huggingface_basic_language_modeling_example.py b/python/ray/train/examples/huggingface/huggingface_basic_language_modeling_example.py index 78fd376676f7..548275819a76 100644 --- a/python/ray/train/examples/huggingface/huggingface_basic_language_modeling_example.py +++ b/python/ray/train/examples/huggingface/huggingface_basic_language_modeling_example.py @@ -6,7 +6,6 @@ import argparse import tempfile -import pandas as pd import torch from datasets import load_dataset from transformers import ( @@ -19,11 +18,7 @@ import ray import ray.data -from ray.train.batch_predictor import BatchPredictor -from ray.train.huggingface import ( - TransformersPredictor, - TransformersTrainer, -) +from ray.train.huggingface import TransformersTrainer from ray.air.config import ScalingConfig @@ -128,19 +123,6 @@ def train_function(train_dataset, eval_dataset=None, **config): results = trainer.fit() print(results.metrics) - tokenizer = AutoTokenizer.from_pretrained(tokenizer_checkpoint) - prompt = ["My text: Complete me..."] - predictor = BatchPredictor.from_checkpoint( - results.checkpoint, - TransformersPredictor, - task="text-generation", - tokenizer=tokenizer, - ) - data = ray.data.from_pandas(pd.DataFrame(prompt, columns=["prompt"])) - prediction = predictor.predict(data, num_gpus_per_worker=int(use_gpu)) - - print(f"Generated text for prompt '{prompt}': '{prediction.take(1)}'") - if __name__ == "__main__": # Training settings diff --git a/python/ray/train/examples/pytorch/torch_regression_example.py b/python/ray/train/examples/pytorch/torch_regression_example.py index 663f4af5a621..b49fe0558aff 100644 --- a/python/ray/train/examples/pytorch/torch_regression_example.py +++ b/python/ray/train/examples/pytorch/torch_regression_example.py @@ -1,7 +1,6 @@ import argparse from typing import Tuple -import numpy as np import pandas as pd from ray.air.checkpoint import Checkpoint @@ -11,10 +10,8 @@ import ray import ray.train as train from ray.air import session -from ray.air.result import Result from ray.data import Dataset -from ray.train.batch_predictor import BatchPredictor -from ray.train.torch import TorchPredictor, TorchTrainer +from ray.train.torch import TorchTrainer from ray.air.config import ScalingConfig @@ -126,19 +123,6 @@ def train_regression(num_workers=2, use_gpu=False): return result -def predict_regression(result: Result): - batch_predictor = BatchPredictor.from_checkpoint(result.checkpoint, TorchPredictor) - - df = pd.DataFrame( - [[np.random.uniform(0, 1, size=100)] for i in range(100)], columns=["x"] - ) - prediction_dataset = ray.data.from_pandas(df) - - predictions = batch_predictor.predict(prediction_dataset, dtype=torch.float) - - return predictions - - if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( @@ -170,5 +154,4 @@ def predict_regression(result: Result): else: ray.init(address=args.address) result = train_regression(num_workers=args.num_workers, use_gpu=args.use_gpu) - predictions = predict_regression(result) - print(predictions.to_pandas()) + print(result) diff --git a/python/ray/train/examples/tf/tensorflow_autoencoder_example.py b/python/ray/train/examples/tf/tensorflow_autoencoder_example.py index c0a91307af6f..678ccaad2416 100644 --- a/python/ray/train/examples/tf/tensorflow_autoencoder_example.py +++ b/python/ray/train/examples/tf/tensorflow_autoencoder_example.py @@ -9,8 +9,6 @@ import tensorflow as tf import tensorflow_datasets as tfds from ray.data.datasource import SimpleTensorFlowDatasource -from ray.air.batch_predictor import BatchPredictor -from ray.air.predictors.integrations.tensorflow import TensorflowPredictor from ray.air.result import Result from ray.train.tensorflow import TensorflowTrainer from ray.train.tensorflow import prepare_dataset_shard @@ -137,61 +135,6 @@ def train_tensorflow_mnist( return results -def predict_tensorflow_mnist(result: Result) -> ray.data.Dataset: - test_dataset = get_dataset(split_type="test") - batch_predictor = BatchPredictor.from_checkpoint( - result.checkpoint, TensorflowPredictor, model_definition=build_autoencoder_model - ) - - predictions = batch_predictor.predict( - test_dataset, feature_columns=["image"], dtype=tf.float32 - ) - - pandas_predictions = predictions.to_pandas(float("inf")) - print(f"PREDICTIONS\n{pandas_predictions}") - - return pandas_predictions - - -def visualize_tensorflow_mnist_autoencoder(result: Result) -> None: - test_dataset = get_dataset(split_type="test") - batch_predictor = BatchPredictor.from_checkpoint( - result.checkpoint, TensorflowPredictor, model_definition=build_autoencoder_model - ) - - # test_dataset. - predictions = batch_predictor.predict( - test_dataset, feature_columns=["image"], dtype=tf.float32 - ) - - pandas_predictions = predictions.to_pandas(float("inf")) - - decoded_imgs = pandas_predictions["predictions"].values - x_test = test_dataset.to_pandas(float("inf"))["image"].values - - import matplotlib.pyplot as plt - - n = 10 # How many digits we will display - plt.figure(figsize=(20, 4)) - for i in range(n): - # Display original - ax = plt.subplot(2, n, i + 1) - plt.imshow(np.asarray(x_test[i]).reshape(28, 28)) - plt.gray() - ax.get_xaxis().set_visible(False) - ax.get_yaxis().set_visible(False) - - # Display reconstruction - ax = plt.subplot(2, n, i + 1 + n) - plt.imshow(np.asarray(decoded_imgs[i]).reshape(28, 28)) - plt.gray() - ax.get_xaxis().set_visible(False) - ax.get_yaxis().set_visible(False) - - # how to retrieve the folderpath of the checkpoint - plt.savefig("test.png") - - if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( @@ -219,8 +162,6 @@ def visualize_tensorflow_mnist_autoencoder(result: Result) -> None: args, _ = parser.parse_known_args() - import ray - if args.smoke_test: # 2 workers, 1 for trainer, 1 for datasets num_gpus = args.num_workers if args.use_gpu else 0 @@ -231,6 +172,4 @@ def visualize_tensorflow_mnist_autoencoder(result: Result) -> None: result = train_tensorflow_mnist( num_workers=args.num_workers, use_gpu=args.use_gpu, epochs=args.epochs ) - - predict_tensorflow_mnist(result) - visualize_tensorflow_mnist_autoencoder(result) + print(result) diff --git a/python/ray/train/examples/tf/tensorflow_regression_example.py b/python/ray/train/examples/tf/tensorflow_regression_example.py index 5b130a9947b4..80aeafc208d1 100644 --- a/python/ray/train/examples/tf/tensorflow_regression_example.py +++ b/python/ray/train/examples/tf/tensorflow_regression_example.py @@ -1,20 +1,13 @@ import argparse -import numpy as np -import pandas as pd import tensorflow as tf import ray from ray.air import session from ray.air.integrations.keras import ReportCheckpointCallback from ray.air.result import Result -from ray.data import Dataset from ray.data.preprocessors import Concatenator -from ray.train.batch_predictor import BatchPredictor -from ray.train.tensorflow import ( - TensorflowPredictor, - TensorflowTrainer, -) +from ray.train.tensorflow import TensorflowTrainer from ray.air.config import ScalingConfig @@ -75,24 +68,6 @@ def train_tensorflow_regression(num_workers: int = 2, use_gpu: bool = False) -> return results -def predict_regression(result: Result) -> Dataset: - batch_predictor = BatchPredictor.from_checkpoint( - result.checkpoint, TensorflowPredictor, model_definition=build_model - ) - - df = pd.DataFrame( - [[np.random.uniform(0, 1, size=100)] for i in range(100)], columns=["x"] - ) - prediction_dataset = ray.data.from_pandas(df) - - predictions = batch_predictor.predict(prediction_dataset, dtype=tf.float32) - - print("PREDICTIONS") - predictions.show() - - return predictions - - if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( @@ -127,4 +102,4 @@ def predict_regression(result: Result) -> Dataset: result = train_tensorflow_regression( num_workers=args.num_workers, use_gpu=args.use_gpu ) - predict_regression(result) + print(result) diff --git a/release/release_tests.yaml b/release/release_tests.yaml index 36320e7ea8a3..5e066064ed75 100644 --- a/release/release_tests.yaml +++ b/release/release_tests.yaml @@ -802,7 +802,7 @@ team: data python: "3.8" cluster: - byod: + byod: type: gpu runtime_env: - RAY_task_oom_retries=50 @@ -904,29 +904,6 @@ timeout: 4700 script: python test_myst_doc.py --path lightning-llm-finetuning-7b.ipynb - -- name: air_example_opt_deepspeed_batch_inference - group: AIR examples - working_dir: air_examples/opt_deepspeed_batch_inference - - python: "3.9" - - frequency: weekly - team: ml - cluster: - byod: - type: gpu - pip: - - myst-parser==0.15.2 - - myst-nb==0.13.1 - - jupytext==1.13.6 - cluster_env: 30b_deepspeed_env.yaml - cluster_compute: 30b_deepspeed_compute.yaml - - run: - timeout: 3600 - script: python test_myst_doc.py --path opt_deepspeed_batch_inference.ipynb - # variations: TODO(jungong): add GCP variation. @@ -941,7 +918,7 @@ frequency: nightly-3x team: data cluster: - byod: + byod: type: gpu cluster_env: ../testing/cluster_envs/default_cluster_env_nightly_ml_py39.yaml cluster_compute: ../testing/compute_configs/gpu/aws.yaml @@ -2330,7 +2307,7 @@ team: ml python: "3.8" cluster: - byod: + byod: type: gpu cluster_env: torch_tune_serve_app_config.yaml cluster_compute: gpu_tpl_aws.yaml @@ -2784,7 +2761,7 @@ team: serve python: "3.8" cluster: - byod: + byod: runtime_env: - RLLIB_TEST_NO_JAX_IMPORT=1 cluster_env: app_config.yaml @@ -2826,7 +2803,7 @@ team: serve python: "3.8" cluster: - byod: + byod: runtime_env: - RLLIB_TEST_NO_JAX_IMPORT=1 cluster_env: app_config.yaml @@ -2868,7 +2845,7 @@ team: serve python: "3.8" cluster: - byod: + byod: runtime_env: - RLLIB_TEST_NO_JAX_IMPORT=1 cluster_env: app_config.yaml @@ -3583,7 +3560,7 @@ team: ml python: "3.8" cluster: - byod: + byod: type: gpu post_build_script: byod_alpa_test.sh cluster_env: app_config.yaml @@ -3621,7 +3598,7 @@ team: ml python: "3.8" cluster: - byod: + byod: type: gpu post_build_script: byod_alpa_test.sh cluster_env: app_config.yaml @@ -4745,7 +4722,7 @@ team: rllib python: "3.8" cluster: - byod: + byod: type: gpu post_build_script: byod_rllib_test.sh runtime_env: @@ -4780,7 +4757,7 @@ team: rllib python: "3.8" cluster: - byod: + byod: type: gpu post_build_script: byod_rllib_test.sh runtime_env: @@ -5087,7 +5064,7 @@ group: core-daily-test working_dir: nightly_tests - stable: false + stable: false python: "3.8" frequency: nightly @@ -5140,7 +5117,7 @@ cluster: byod: runtime_env: - - RAY_MAX_LIMIT_FROM_API_SERVER=1000000000 + - RAY_MAX_LIMIT_FROM_API_SERVER=1000000000 - RAY_MAX_LIMIT_FROM_DATA_SOURCE=1000000000 cluster_env: shuffle/shuffle_with_state_api_app_config.yaml cluster_compute: shuffle/shuffle_compute_single.yaml @@ -5790,7 +5767,7 @@ team: data python: "3.8" cluster: - byod: + byod: type: gpu cluster_env: app_config.yaml cluster_compute: inference.yaml @@ -5845,7 +5822,7 @@ team: data python: "3.8" cluster: - byod: + byod: type: gpu cluster_env: app_config.yaml cluster_compute: single_node_benchmark_compute.yaml @@ -5904,7 +5881,7 @@ team: data python: "3.8" cluster: - byod: + byod: type: gpu cluster_env: app_config.yaml cluster_compute: data_ingest_benchmark_compute.yaml @@ -6208,7 +6185,7 @@ team: data python: "3.8" cluster: - byod: + byod: type: gpu cluster_env: app_config.yaml cluster_compute: single_node_benchmark_compute.yaml @@ -6870,4 +6847,4 @@ run: timeout: 2400 - script: python launch_and_verify_cluster.py gcp/example-full.yaml \ No newline at end of file + script: python launch_and_verify_cluster.py gcp/example-full.yaml