Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Alexej/eng 267 additional example run implementations #286

Merged
Merged
3 changes: 0 additions & 3 deletions examples/class_based_api/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ zenml example pull class_based_api
cd zenml_examples/class_based_api

# initialize
git init
zenml init
```

Expand All @@ -57,5 +56,3 @@ In order to clean up, delete the remaining zenml references.
```shell
rm -rf zenml_examples
```

Press next to start the first chapter!
20 changes: 19 additions & 1 deletion examples/class_based_api/chapter_1.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,34 @@

import os
from typing import List, Optional, Union
from urllib.request import urlopen

import pandas as pd

from zenml.core.repo import Repository
from zenml.logger import get_logger
from zenml.pipelines import BasePipeline
from zenml.steps.step_interfaces.base_datasource_step import (
BaseDatasourceConfig,
BaseDatasourceStep,
)

logger = get_logger(__name__)

DATASET_PATH = "diabetes.csv"
DATASET_SRC = (
"https://storage.googleapis.com/zenml-public-bucket/"
"pima-indians-diabetes/diabetes.csv"
)

# Download the dataset for this example
if not os.path.isfile(DATASET_PATH):
logger.info(f"Downloading dataset {DATASET_PATH}")
with urlopen(DATASET_SRC) as data:
content = data.read().decode()
with open(DATASET_PATH, "w") as output:
output.write(content)


class PandasDatasourceConfig(BaseDatasourceConfig):
path: str
Expand Down Expand Up @@ -58,7 +76,7 @@ def connect(


pipeline_instance = Chapter1Pipeline(
datasource=PandasDatasource(PandasDatasourceConfig(path=os.getenv("data")))
datasource=PandasDatasource(PandasDatasourceConfig(path=DATASET_PATH))
)

pipeline_instance.run()
Expand Down
20 changes: 19 additions & 1 deletion examples/class_based_api/chapter_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,30 @@
# or implied. See the License for the specific language governing
# permissions and limitations under the License.
import os
from urllib.request import urlopen

from zenml.core.repo import Repository
from zenml.integrations.sklearn import steps as sklearn_steps
from zenml.logger import get_logger
from zenml.pipelines import BasePipeline
from zenml.steps import builtin_steps, step_interfaces

logger = get_logger(__name__)

DATASET_PATH = "diabetes.csv"
DATASET_SRC = (
"https://storage.googleapis.com/zenml-public-bucket/"
"pima-indians-diabetes/diabetes.csv"
)

# Download the dataset for this example
if not os.path.isfile(DATASET_PATH):
logger.info(f"Downloading dataset {DATASET_PATH}")
with urlopen(DATASET_SRC) as data:
content = data.read().decode()
with open(DATASET_PATH, "w") as output:
output.write(content)


class Chapter2Pipeline(BasePipeline):
"""Class for Chapter 2 of the class-based API"""
Expand Down Expand Up @@ -51,7 +69,7 @@ def connect(
# Create an instance of the pipeline and run it
pipeline_instance = Chapter2Pipeline(
datasource=builtin_steps.PandasDatasource(
config=builtin_steps.PandasDatasourceConfig(path=os.getenv("data"))
config=builtin_steps.PandasDatasourceConfig(path=DATASET_PATH)
),
splitter=sklearn_steps.SklearnSplitter(
config=sklearn_steps.SklearnSplitterConfig(
Expand Down
23 changes: 21 additions & 2 deletions examples/class_based_api/chapter_3.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,35 @@
# permissions and limitations under the License.

import os
from urllib.request import urlopen

from zenml.core.repo import Repository
from zenml.integrations.sklearn import steps as sklearn_steps
from zenml.integrations.tensorflow import steps as tf_steps
from zenml.logger import get_logger
from zenml.pipelines.builtin_pipelines import TrainingPipeline
from zenml.steps import builtin_steps

logger = get_logger(__name__)

DATASET_PATH = "diabetes.csv"
DATASET_SRC = (
"https://storage.googleapis.com/zenml-public-bucket/"
"pima-indians-diabetes/diabetes.csv"
)

# Download the dataset for this example
if not os.path.isfile(DATASET_PATH):
logger.info(f"Downloading dataset {DATASET_PATH}")
with urlopen(DATASET_SRC) as data:
content = data.read().decode()
with open(DATASET_PATH, "w") as output:
output.write(content)


# Configuring the datasource
datasource = builtin_steps.PandasDatasource(
builtin_steps.PandasDatasourceConfig(path=os.getenv("data"))
builtin_steps.PandasDatasourceConfig(path=DATASET_PATH)
)

# Configuring the split step
Expand All @@ -45,7 +64,7 @@
# Configuring the training step
trainer = tf_steps.TensorflowBinaryClassifier(
tf_steps.TensorflowBinaryClassifierConfig(
target_column="has_diabetes", epochs=1
target_column="has_diabetes", epochs=10
)
)

Expand Down
13 changes: 13 additions & 0 deletions examples/class_based_api/setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/usr/bin/env bash

set -Eeo pipefail

pre_run () {
zenml integration install sklearn
zenml integration install tensorflow
}

pre_run_forced () {
zenml integration install sklearn -f
zenml integration install tensorflow -f
}
4 changes: 1 addition & 3 deletions examples/functional_api/chapter_7.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,7 @@

# Path to a pip requirements file that contains requirements necessary to run
# the pipeline
requirements_file = os.path.join(
os.path.dirname(__file__), "chapter_7_requirements.txt"
)
requirements_file = os.path.join(os.path.dirname(__file__), "requirements.txt")


class ImporterConfig(BaseStepConfig):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
scikit-learn
pandas
numpy
requests

11 changes: 11 additions & 0 deletions examples/functional_api/setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/usr/bin/env bash

set -Eeo pipefail

pre_run () {
zenml integration install sklearn
}

pre_run_forced () {
zenml integration install sklearn -f
}
Binary file removed examples/legacy/backends/graphics/architecture.png
Binary file not shown.
1 change: 0 additions & 1 deletion examples/standard_interfaces/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ zenml example pull standard_interfaces
cd zenml_examples/standard_interfaces

# initialize
git init
schustmi marked this conversation as resolved.
Show resolved Hide resolved
zenml init
```

Expand Down
20 changes: 19 additions & 1 deletion examples/standard_interfaces/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,33 @@
# or implied. See the License for the specific language governing
# permissions and limitations under the License.
import os
from urllib.request import urlopen

from zenml.integrations.sklearn import steps as sklearn_steps
from zenml.integrations.tensorflow import steps as tf_steps
from zenml.logger import get_logger
from zenml.pipelines.builtin_pipelines import TrainingPipeline
from zenml.steps import builtin_steps

logger = get_logger(__name__)

DATASET_PATH = "diabetes.csv"
DATASET_SRC = (
"https://storage.googleapis.com/zenml-public-bucket/"
"pima-indians-diabetes/diabetes.csv"
)

# Download the dataset for this example
if not os.path.isfile(DATASET_PATH):
logger.info(f"Downloading dataset {DATASET_PATH}")
with urlopen(DATASET_SRC) as data:
content = data.read().decode()
with open(DATASET_PATH, "w") as output:
output.write(content)

# Configuring the datasource
datasource = builtin_steps.PandasDatasource(
builtin_steps.PandasDatasourceConfig(path=os.getenv("test_data"))
builtin_steps.PandasDatasourceConfig(path=DATASET_PATH)
)

# Configuring the split step
Expand Down
4 changes: 0 additions & 4 deletions examples/standard_interfaces/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,11 @@
set -Eeo pipefail

pre_run () {
# "Currently run is not implemented for standard_interfaces due to manual loading of csv file!" -> exit code 42
exit 38
zenml integration install sklearn
zenml integration install tensorflow
}

pre_run_forced () {
# "Currently run is not implemented for standard_interfaces due to manual loading of csv file!" -> exit code 42
exit 38
zenml integration install sklearn -f
zenml integration install tensorflow -f
}
9 changes: 4 additions & 5 deletions src/zenml/cli/example.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,11 @@ def executable_python_example(self) -> str:
if self.has_single_python_file:
return self.python_files_in_dir[0]
elif self.has_any_python_file:
raise RuntimeError(
"Unclear which python file to return for "
f"example {self.name}."
f"{self.python_files_in_dir}"
logger.warning(
"This example has multiple executable python files"
"The last one in alphanumerical order is taken."
)
return sorted(self.python_files_in_dir)[-1]
else:
raise RuntimeError(
"No pipeline runner script found in example. "
Expand All @@ -120,7 +120,6 @@ def run_example(self, bash_file: str, force: bool) -> None:
os.chdir(self.path)
try:
# TODO [ENG-271]: Catch errors that might be thrown in subprocess
declare(str(self.path))
if force:
subprocess.check_call(
[
Expand Down