zenml-io · AlexejPenner · Jan 3, 2022 · Dec 28, 2021 · Dec 28, 2021 · Dec 28, 2021
diff --git a/examples/class_based_api/README.md b/examples/class_based_api/README.md
@@ -33,7 +33,6 @@ zenml example pull class_based_api
 cd zenml_examples/class_based_api
 
 # initialize
-git init
 zenml init
 ```
 
@@ -57,5 +56,3 @@ In order to clean up, delete the remaining zenml references.
 ```shell
 rm -rf zenml_examples
 ```
-
-Press next to start the first chapter!
diff --git a/examples/class_based_api/chapter_1.py b/examples/class_based_api/chapter_1.py
@@ -14,16 +14,34 @@
 
 import os
 from typing import List, Optional, Union
+from urllib.request import urlopen
 
 import pandas as pd
 
 from zenml.core.repo import Repository
+from zenml.logger import get_logger
 from zenml.pipelines import BasePipeline
 from zenml.steps.step_interfaces.base_datasource_step import (
     BaseDatasourceConfig,
     BaseDatasourceStep,
 )
 
+logger = get_logger(__name__)
+
+DATASET_PATH = "diabetes.csv"
+DATASET_SRC = (
+    "https://storage.googleapis.com/zenml-public-bucket/"
+    "pima-indians-diabetes/diabetes.csv"
+)
+
+# Download the dataset for this example
+if not os.path.isfile(DATASET_PATH):
+    logger.info(f"Downloading dataset {DATASET_PATH}")
+    with urlopen(DATASET_SRC) as data:
+        content = data.read().decode()
+    with open(DATASET_PATH, "w") as output:
+        output.write(content)
+
 
 class PandasDatasourceConfig(BaseDatasourceConfig):
     path: str
@@ -58,7 +76,7 @@ def connect(
 
 
 pipeline_instance = Chapter1Pipeline(
-    datasource=PandasDatasource(PandasDatasourceConfig(path=os.getenv("data")))
+    datasource=PandasDatasource(PandasDatasourceConfig(path=DATASET_PATH))
 )
 
 pipeline_instance.run()

diff --git a/examples/class_based_api/chapter_2.py b/examples/class_based_api/chapter_2.py
@@ -12,12 +12,30 @@
 #  or implied. See the License for the specific language governing
 #  permissions and limitations under the License.
 import os
+from urllib.request import urlopen
 
 from zenml.core.repo import Repository
 from zenml.integrations.sklearn import steps as sklearn_steps
+from zenml.logger import get_logger
 from zenml.pipelines import BasePipeline
 from zenml.steps import builtin_steps, step_interfaces
 
+logger = get_logger(__name__)
+
+DATASET_PATH = "diabetes.csv"
+DATASET_SRC = (
+    "https://storage.googleapis.com/zenml-public-bucket/"
+    "pima-indians-diabetes/diabetes.csv"
+)
+
+# Download the dataset for this example
+if not os.path.isfile(DATASET_PATH):
+    logger.info(f"Downloading dataset {DATASET_PATH}")
+    with urlopen(DATASET_SRC) as data:
+        content = data.read().decode()
+    with open(DATASET_PATH, "w") as output:
+        output.write(content)
+
 
 class Chapter2Pipeline(BasePipeline):
     """Class for Chapter 2 of the class-based API"""
@@ -51,7 +69,7 @@ def connect(
 # Create an instance of the pipeline and run it
 pipeline_instance = Chapter2Pipeline(
     datasource=builtin_steps.PandasDatasource(
-        config=builtin_steps.PandasDatasourceConfig(path=os.getenv("data"))
+        config=builtin_steps.PandasDatasourceConfig(path=DATASET_PATH)
     ),
     splitter=sklearn_steps.SklearnSplitter(
         config=sklearn_steps.SklearnSplitterConfig(

diff --git a/examples/class_based_api/chapter_3.py b/examples/class_based_api/chapter_3.py
@@ -13,16 +13,35 @@
 #  permissions and limitations under the License.
 
 import os
+from urllib.request import urlopen
 
 from zenml.core.repo import Repository
 from zenml.integrations.sklearn import steps as sklearn_steps
 from zenml.integrations.tensorflow import steps as tf_steps
+from zenml.logger import get_logger
 from zenml.pipelines.builtin_pipelines import TrainingPipeline
 from zenml.steps import builtin_steps
 
+logger = get_logger(__name__)
+
+DATASET_PATH = "diabetes.csv"
+DATASET_SRC = (
+    "https://storage.googleapis.com/zenml-public-bucket/"
+    "pima-indians-diabetes/diabetes.csv"
+)
+
+# Download the dataset for this example
+if not os.path.isfile(DATASET_PATH):
+    logger.info(f"Downloading dataset {DATASET_PATH}")
+    with urlopen(DATASET_SRC) as data:
+        content = data.read().decode()
+    with open(DATASET_PATH, "w") as output:
+        output.write(content)
+
+
 # Configuring the datasource
 datasource = builtin_steps.PandasDatasource(
-    builtin_steps.PandasDatasourceConfig(path=os.getenv("data"))
+    builtin_steps.PandasDatasourceConfig(path=DATASET_PATH)
 )
 
 # Configuring the split step
@@ -45,7 +64,7 @@
 # Configuring the training step
 trainer = tf_steps.TensorflowBinaryClassifier(
     tf_steps.TensorflowBinaryClassifierConfig(
-        target_column="has_diabetes", epochs=1
+        target_column="has_diabetes", epochs=10
     )
 )
 

diff --git a/examples/class_based_api/setup.sh b/examples/class_based_api/setup.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+
+set -Eeo pipefail
+
+pre_run () {
+  zenml integration install sklearn
+  zenml integration install tensorflow
+}
+
+pre_run_forced () {
+  zenml integration install sklearn -f
+  zenml integration install tensorflow -f
+}
diff --git a/examples/functional_api/chapter_7.py b/examples/functional_api/chapter_7.py
@@ -24,9 +24,7 @@
 
 # Path to a pip requirements file that contains requirements necessary to run
 # the pipeline
-requirements_file = os.path.join(
-    os.path.dirname(__file__), "chapter_7_requirements.txt"
-)
+requirements_file = os.path.join(os.path.dirname(__file__), "requirements.txt")
 
 
 class ImporterConfig(BaseStepConfig):

diff --git a/...functional_api/chapter_7_requirements.txt → examples/functional_api/requirements.txt b/...functional_api/chapter_7_requirements.txt → examples/functional_api/requirements.txt
@@ -1,4 +1,4 @@
-scikit-learn
 pandas
 numpy
 requests
+
diff --git a/examples/functional_api/setup.sh b/examples/functional_api/setup.sh
@@ -0,0 +1,11 @@
+#!/usr/bin/env bash
+
+set -Eeo pipefail
+
+pre_run () {
+  zenml integration install sklearn
+}
+
+pre_run_forced () {
+  zenml integration install sklearn -f
+}
diff --git a/examples/legacy/backends/graphics/architecture.png b/examples/legacy/backends/graphics/architecture.png
diff --git a/examples/standard_interfaces/README.md b/examples/standard_interfaces/README.md
@@ -43,7 +43,6 @@ zenml example pull standard_interfaces
 cd zenml_examples/standard_interfaces
 
 # initialize
-git init
 zenml init
 ```
 

diff --git a/examples/standard_interfaces/run.py b/examples/standard_interfaces/run.py
@@ -12,15 +12,33 @@
 #  or implied. See the License for the specific language governing
 #  permissions and limitations under the License.
 import os
+from urllib.request import urlopen
 
 from zenml.integrations.sklearn import steps as sklearn_steps
 from zenml.integrations.tensorflow import steps as tf_steps
+from zenml.logger import get_logger
 from zenml.pipelines.builtin_pipelines import TrainingPipeline
 from zenml.steps import builtin_steps
 
+logger = get_logger(__name__)
+
+DATASET_PATH = "diabetes.csv"
+DATASET_SRC = (
+    "https://storage.googleapis.com/zenml-public-bucket/"
+    "pima-indians-diabetes/diabetes.csv"
+)
+
+# Download the dataset for this example
+if not os.path.isfile(DATASET_PATH):
+    logger.info(f"Downloading dataset {DATASET_PATH}")
+    with urlopen(DATASET_SRC) as data:
+        content = data.read().decode()
+    with open(DATASET_PATH, "w") as output:
+        output.write(content)
+
 # Configuring the datasource
 datasource = builtin_steps.PandasDatasource(
-    builtin_steps.PandasDatasourceConfig(path=os.getenv("test_data"))
+    builtin_steps.PandasDatasourceConfig(path=DATASET_PATH)
 )
 
 # Configuring the split step

diff --git a/examples/standard_interfaces/setup.sh b/examples/standard_interfaces/setup.sh
@@ -3,15 +3,11 @@
 set -Eeo pipefail
 
 pre_run () {
-  # "Currently run is not implemented for standard_interfaces due to manual loading of csv file!" -> exit code 42
-  exit 38
   zenml integration install sklearn
   zenml integration install tensorflow
 }
 
 pre_run_forced () {
-  # "Currently run is not implemented for standard_interfaces due to manual loading of csv file!" -> exit code 42
-  exit 38
   zenml integration install sklearn -f
   zenml integration install tensorflow -f
 }
diff --git a/src/zenml/cli/example.py b/src/zenml/cli/example.py
@@ -91,11 +91,11 @@ def executable_python_example(self) -> str:
         if self.has_single_python_file:
             return self.python_files_in_dir[0]
         elif self.has_any_python_file:
-            raise RuntimeError(
-                "Unclear which python file to return for "
-                f"example {self.name}."
-                f"{self.python_files_in_dir}"
+            logger.warning(
+                "This example has multiple executable python files"
+                "The last one in alphanumerical order is taken."
             )
+            return sorted(self.python_files_in_dir)[-1]
         else:
             raise RuntimeError(
                 "No pipeline runner script found in example. "
@@ -120,7 +120,6 @@ def run_example(self, bash_file: str, force: bool) -> None:
             os.chdir(self.path)
             try:
                 # TODO [ENG-271]: Catch errors that might be thrown in subprocess
-                declare(str(self.path))
                 if force:
                     subprocess.check_call(
                         [