diff --git a/examples/class_based_api/README.md b/examples/class_based_api/README.md index d2c0e06075e..38d8fd4abab 100644 --- a/examples/class_based_api/README.md +++ b/examples/class_based_api/README.md @@ -33,7 +33,6 @@ zenml example pull class_based_api cd zenml_examples/class_based_api # initialize -git init zenml init ``` @@ -57,5 +56,3 @@ In order to clean up, delete the remaining zenml references. ```shell rm -rf zenml_examples ``` - -Press next to start the first chapter! diff --git a/examples/class_based_api/chapter_1.py b/examples/class_based_api/chapter_1.py index f69897378c2..ab94916c074 100644 --- a/examples/class_based_api/chapter_1.py +++ b/examples/class_based_api/chapter_1.py @@ -14,16 +14,34 @@ import os from typing import List, Optional, Union +from urllib.request import urlopen import pandas as pd from zenml.core.repo import Repository +from zenml.logger import get_logger from zenml.pipelines import BasePipeline from zenml.steps.step_interfaces.base_datasource_step import ( BaseDatasourceConfig, BaseDatasourceStep, ) +logger = get_logger(__name__) + +DATASET_PATH = "diabetes.csv" +DATASET_SRC = ( + "https://storage.googleapis.com/zenml-public-bucket/" + "pima-indians-diabetes/diabetes.csv" +) + +# Download the dataset for this example +if not os.path.isfile(DATASET_PATH): + logger.info(f"Downloading dataset {DATASET_PATH}") + with urlopen(DATASET_SRC) as data: + content = data.read().decode() + with open(DATASET_PATH, "w") as output: + output.write(content) + class PandasDatasourceConfig(BaseDatasourceConfig): path: str @@ -58,7 +76,7 @@ def connect( pipeline_instance = Chapter1Pipeline( - datasource=PandasDatasource(PandasDatasourceConfig(path=os.getenv("data"))) + datasource=PandasDatasource(PandasDatasourceConfig(path=DATASET_PATH)) ) pipeline_instance.run() diff --git a/examples/class_based_api/chapter_2.py b/examples/class_based_api/chapter_2.py index e20ae286247..0f3395ad748 100644 --- a/examples/class_based_api/chapter_2.py +++ b/examples/class_based_api/chapter_2.py @@ -12,12 +12,30 @@ # or implied. See the License for the specific language governing # permissions and limitations under the License. import os +from urllib.request import urlopen from zenml.core.repo import Repository from zenml.integrations.sklearn import steps as sklearn_steps +from zenml.logger import get_logger from zenml.pipelines import BasePipeline from zenml.steps import builtin_steps, step_interfaces +logger = get_logger(__name__) + +DATASET_PATH = "diabetes.csv" +DATASET_SRC = ( + "https://storage.googleapis.com/zenml-public-bucket/" + "pima-indians-diabetes/diabetes.csv" +) + +# Download the dataset for this example +if not os.path.isfile(DATASET_PATH): + logger.info(f"Downloading dataset {DATASET_PATH}") + with urlopen(DATASET_SRC) as data: + content = data.read().decode() + with open(DATASET_PATH, "w") as output: + output.write(content) + class Chapter2Pipeline(BasePipeline): """Class for Chapter 2 of the class-based API""" @@ -51,7 +69,7 @@ def connect( # Create an instance of the pipeline and run it pipeline_instance = Chapter2Pipeline( datasource=builtin_steps.PandasDatasource( - config=builtin_steps.PandasDatasourceConfig(path=os.getenv("data")) + config=builtin_steps.PandasDatasourceConfig(path=DATASET_PATH) ), splitter=sklearn_steps.SklearnSplitter( config=sklearn_steps.SklearnSplitterConfig( diff --git a/examples/class_based_api/chapter_3.py b/examples/class_based_api/chapter_3.py index 7fa8208649c..269038e0e17 100644 --- a/examples/class_based_api/chapter_3.py +++ b/examples/class_based_api/chapter_3.py @@ -13,16 +13,35 @@ # permissions and limitations under the License. import os +from urllib.request import urlopen from zenml.core.repo import Repository from zenml.integrations.sklearn import steps as sklearn_steps from zenml.integrations.tensorflow import steps as tf_steps +from zenml.logger import get_logger from zenml.pipelines.builtin_pipelines import TrainingPipeline from zenml.steps import builtin_steps +logger = get_logger(__name__) + +DATASET_PATH = "diabetes.csv" +DATASET_SRC = ( + "https://storage.googleapis.com/zenml-public-bucket/" + "pima-indians-diabetes/diabetes.csv" +) + +# Download the dataset for this example +if not os.path.isfile(DATASET_PATH): + logger.info(f"Downloading dataset {DATASET_PATH}") + with urlopen(DATASET_SRC) as data: + content = data.read().decode() + with open(DATASET_PATH, "w") as output: + output.write(content) + + # Configuring the datasource datasource = builtin_steps.PandasDatasource( - builtin_steps.PandasDatasourceConfig(path=os.getenv("data")) + builtin_steps.PandasDatasourceConfig(path=DATASET_PATH) ) # Configuring the split step @@ -45,7 +64,7 @@ # Configuring the training step trainer = tf_steps.TensorflowBinaryClassifier( tf_steps.TensorflowBinaryClassifierConfig( - target_column="has_diabetes", epochs=1 + target_column="has_diabetes", epochs=10 ) ) diff --git a/examples/class_based_api/setup.sh b/examples/class_based_api/setup.sh new file mode 100644 index 00000000000..96916b5df8a --- /dev/null +++ b/examples/class_based_api/setup.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +set -Eeo pipefail + +pre_run () { + zenml integration install sklearn + zenml integration install tensorflow +} + +pre_run_forced () { + zenml integration install sklearn -f + zenml integration install tensorflow -f +} \ No newline at end of file diff --git a/examples/functional_api/chapter_7.py b/examples/functional_api/chapter_7.py index 12b37cb209c..0bd2948c1a3 100644 --- a/examples/functional_api/chapter_7.py +++ b/examples/functional_api/chapter_7.py @@ -24,9 +24,7 @@ # Path to a pip requirements file that contains requirements necessary to run # the pipeline -requirements_file = os.path.join( - os.path.dirname(__file__), "chapter_7_requirements.txt" -) +requirements_file = os.path.join(os.path.dirname(__file__), "requirements.txt") class ImporterConfig(BaseStepConfig): diff --git a/examples/functional_api/chapter_7_requirements.txt b/examples/functional_api/requirements.txt similarity index 62% rename from examples/functional_api/chapter_7_requirements.txt rename to examples/functional_api/requirements.txt index 56cd5ef6e81..6008bc2ea42 100644 --- a/examples/functional_api/chapter_7_requirements.txt +++ b/examples/functional_api/requirements.txt @@ -1,4 +1,4 @@ -scikit-learn pandas numpy requests + diff --git a/examples/functional_api/setup.sh b/examples/functional_api/setup.sh new file mode 100644 index 00000000000..4dd4a0938a7 --- /dev/null +++ b/examples/functional_api/setup.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +set -Eeo pipefail + +pre_run () { + zenml integration install sklearn +} + +pre_run_forced () { + zenml integration install sklearn -f +} \ No newline at end of file diff --git a/examples/legacy/backends/graphics/architecture.png b/examples/legacy/backends/graphics/architecture.png deleted file mode 100644 index 140ec450c91..00000000000 Binary files a/examples/legacy/backends/graphics/architecture.png and /dev/null differ diff --git a/examples/standard_interfaces/README.md b/examples/standard_interfaces/README.md index d36d0fa5b07..698ea7f2473 100644 --- a/examples/standard_interfaces/README.md +++ b/examples/standard_interfaces/README.md @@ -43,7 +43,6 @@ zenml example pull standard_interfaces cd zenml_examples/standard_interfaces # initialize -git init zenml init ``` diff --git a/examples/standard_interfaces/run.py b/examples/standard_interfaces/run.py index d0fb7d1100a..91d8cf29bcd 100644 --- a/examples/standard_interfaces/run.py +++ b/examples/standard_interfaces/run.py @@ -12,15 +12,33 @@ # or implied. See the License for the specific language governing # permissions and limitations under the License. import os +from urllib.request import urlopen from zenml.integrations.sklearn import steps as sklearn_steps from zenml.integrations.tensorflow import steps as tf_steps +from zenml.logger import get_logger from zenml.pipelines.builtin_pipelines import TrainingPipeline from zenml.steps import builtin_steps +logger = get_logger(__name__) + +DATASET_PATH = "diabetes.csv" +DATASET_SRC = ( + "https://storage.googleapis.com/zenml-public-bucket/" + "pima-indians-diabetes/diabetes.csv" +) + +# Download the dataset for this example +if not os.path.isfile(DATASET_PATH): + logger.info(f"Downloading dataset {DATASET_PATH}") + with urlopen(DATASET_SRC) as data: + content = data.read().decode() + with open(DATASET_PATH, "w") as output: + output.write(content) + # Configuring the datasource datasource = builtin_steps.PandasDatasource( - builtin_steps.PandasDatasourceConfig(path=os.getenv("test_data")) + builtin_steps.PandasDatasourceConfig(path=DATASET_PATH) ) # Configuring the split step diff --git a/examples/standard_interfaces/setup.sh b/examples/standard_interfaces/setup.sh index 86de512400f..96916b5df8a 100644 --- a/examples/standard_interfaces/setup.sh +++ b/examples/standard_interfaces/setup.sh @@ -3,15 +3,11 @@ set -Eeo pipefail pre_run () { - # "Currently run is not implemented for standard_interfaces due to manual loading of csv file!" -> exit code 42 - exit 38 zenml integration install sklearn zenml integration install tensorflow } pre_run_forced () { - # "Currently run is not implemented for standard_interfaces due to manual loading of csv file!" -> exit code 42 - exit 38 zenml integration install sklearn -f zenml integration install tensorflow -f } \ No newline at end of file diff --git a/src/zenml/cli/example.py b/src/zenml/cli/example.py index d36264dab42..28ab176944c 100644 --- a/src/zenml/cli/example.py +++ b/src/zenml/cli/example.py @@ -91,11 +91,11 @@ def executable_python_example(self) -> str: if self.has_single_python_file: return self.python_files_in_dir[0] elif self.has_any_python_file: - raise RuntimeError( - "Unclear which python file to return for " - f"example {self.name}." - f"{self.python_files_in_dir}" + logger.warning( + "This example has multiple executable python files" + "The last one in alphanumerical order is taken." ) + return sorted(self.python_files_in_dir)[-1] else: raise RuntimeError( "No pipeline runner script found in example. " @@ -120,7 +120,6 @@ def run_example(self, bash_file: str, force: bool) -> None: os.chdir(self.path) try: # TODO [ENG-271]: Catch errors that might be thrown in subprocess - declare(str(self.path)) if force: subprocess.check_call( [