Skip to content

Commit

Permalink
🐛 Enable using pip-requirements in modelify command and use environme…
Browse files Browse the repository at this point in the history
…nts packages versions as default conda env (#405)
  • Loading branch information
Galileo-Galilei committed Feb 12, 2023
1 parent d5fe84b commit 5be41ef
Show file tree
Hide file tree
Showing 3 changed files with 220 additions and 55 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@

- :memo: Update incorrect documentation about model registry with local relative filepath ([#400](https://github.com/Galileo-Galilei/kedro-mlflow/issues/400))

- :bug: The ``modelify`` command now creates a conda environment based on your environment python and kedro versions instead of hardcoded ``python=3.7`` and ``kedro=0.16.5`` ([#405](https://github.com/Galileo-Galilei/kedro-mlflow/issues/405))

- :bug: The ``modelify`` command now uses correctly the ``--pip-requirements`` argument instead of raising an error ([#405](https://github.com/Galileo-Galilei/kedro-mlflow/issues/405))

## [0.11.7] - 2023-01-28

### Added
Expand Down
26 changes: 19 additions & 7 deletions kedro_mlflow/framework/cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@
import webbrowser
from logging import getLogger
from pathlib import Path
from platform import python_version
from tempfile import TemporaryDirectory
from typing import Dict, Optional, Union

import click
import mlflow
from kedro import __version__ as kedro_version
from kedro.framework.project import pipelines, settings
from kedro.framework.session import KedroSession
from kedro.framework.startup import _is_project, bootstrap_project
Expand Down Expand Up @@ -289,13 +291,14 @@ def modelify(
copy_mode: Optional[Union[str, Dict[str, str]]],
artifact_path: str,
code_path: str,
conda_env: str,
conda_env: Optional[str],
registered_model_name: str,
await_registration_for: int,
pip_requirements: str,
extra_pip_requirements: str,
pip_requirements: Optional[str],
extra_pip_requirements: Optional[str],
):
"""Export a kedro pipeline as a mlflow model for serving"""

# if the command is available, we are necessarily at the root of a kedro project

project_path = Path.cwd()
Expand Down Expand Up @@ -343,24 +346,33 @@ def modelify(

artifacts = kedro_pipeline_model.extract_pipeline_artifacts(Path(tmp_dir))

if conda_env is None:
conda_env = {"python": "3.7.0", "dependencies": ["kedro==0.16.5"]}

log_model_kwargs = dict(
artifact_path=artifact_path,
python_model=kedro_pipeline_model,
artifacts=artifacts,
code_path=code_path,
conda_env=conda_env,
signature=model_signature,
input_example=input_example,
registered_model_name=registered_model_name,
await_registration_for=await_registration_for,
)

if version.parse(f"{mlflow.__version__}") >= version.parse("1.20.0"):
log_model_kwargs["pip_requirements"] = pip_requirements
log_model_kwargs["extra_pip_requirements"] = extra_pip_requirements

if (
(conda_env is None)
and (pip_requirements is None)
and (extra_pip_requirements is None)
):
conda_env = {
"python": python_version(),
"dependencies": [f"kedro=={kedro_version}"],
}

log_model_kwargs["conda_env"] = conda_env

with mlflow.start_run(run_id=run_id):
mlflow.pyfunc.log_model(**log_model_kwargs)
run_id = mlflow.active_run().info.run_id
Expand Down
245 changes: 197 additions & 48 deletions tests/framework/cli/test_cli_modelify.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import re
import shutil
from pathlib import Path
from platform import python_version

import mlflow
import pandas as pd
Expand Down Expand Up @@ -298,14 +299,12 @@ def test_modelify_with_artifact_path_arg(monkeypatch, kp_for_modelify):
catalog = context.catalog
catalog.save("trained_model", 2)

runs_id_set_before_cmd = set(
[
run.info.run_id
for run in context.mlflow.server._mlflow_client.search_runs(
context.mlflow.tracking.experiment._experiment.experiment_id
)
]
)
runs_id_set_before_cmd = {
run.info.run_id
for run in context.mlflow.server._mlflow_client.search_runs(
context.mlflow.tracking.experiment._experiment.experiment_id
)
}

result = cli_runner.invoke(
cli_modelify,
Expand All @@ -319,14 +318,12 @@ def test_modelify_with_artifact_path_arg(monkeypatch, kp_for_modelify):
],
catch_exceptions=True,
)
runs_id_set_after_cmd = set(
[
run.info.run_id
for run in context.mlflow.server._mlflow_client.search_runs(
context.mlflow.tracking.experiment._experiment.experiment_id
)
]
)
runs_id_set_after_cmd = {
run.info.run_id
for run in context.mlflow.server._mlflow_client.search_runs(
context.mlflow.tracking.experiment._experiment.experiment_id
)
}

new_run_id = runs_id_set_after_cmd - runs_id_set_before_cmd

Expand Down Expand Up @@ -356,14 +353,12 @@ def test_modelify_with_infer_signature_arg(
catalog.save("trained_model", 2)
catalog.save("my_input_data", my_input_data)

runs_id_set_before_cmd = set(
[
run.info.run_id
for run in context.mlflow.server._mlflow_client.search_runs(
context.mlflow.tracking.experiment._experiment.experiment_id
)
]
)
runs_id_set_before_cmd = {
run.info.run_id
for run in context.mlflow.server._mlflow_client.search_runs(
context.mlflow.tracking.experiment._experiment.experiment_id
)
}

result = cli_runner.invoke(
cli_modelify,
Expand All @@ -379,14 +374,12 @@ def test_modelify_with_infer_signature_arg(

assert result.exit_code == 0

runs_id_set_after_cmd = set(
[
run.info.run_id
for run in context.mlflow.server._mlflow_client.search_runs(
context.mlflow.tracking.experiment._experiment.experiment_id
)
]
)
runs_id_set_after_cmd = {
run.info.run_id
for run in context.mlflow.server._mlflow_client.search_runs(
context.mlflow.tracking.experiment._experiment.experiment_id
)
}

new_run_id = list(runs_id_set_after_cmd - runs_id_set_before_cmd)[0]

Expand Down Expand Up @@ -418,14 +411,12 @@ def test_modelify_with_infer_input_example(
catalog.save("trained_model", 2)
catalog.save("my_input_data", my_input_data)

runs_id_set_before_cmd = set(
[
run.info.run_id
for run in context.mlflow.server._mlflow_client.search_runs(
context.mlflow.tracking.experiment._experiment.experiment_id
)
]
)
runs_id_set_before_cmd = {
run.info.run_id
for run in context.mlflow.server._mlflow_client.search_runs(
context.mlflow.tracking.experiment._experiment.experiment_id
)
}

cmd = [
"--pipeline",
Expand All @@ -445,14 +436,12 @@ def test_modelify_with_infer_input_example(

assert result.exit_code == 0

runs_id_set_after_cmd = set(
[
run.info.run_id
for run in context.mlflow.server._mlflow_client.search_runs(
context.mlflow.tracking.experiment._experiment.experiment_id
)
]
)
runs_id_set_after_cmd = {
run.info.run_id
for run in context.mlflow.server._mlflow_client.search_runs(
context.mlflow.tracking.experiment._experiment.experiment_id
)
}

new_run_id = list(runs_id_set_after_cmd - runs_id_set_before_cmd)[0]

Expand All @@ -463,3 +452,163 @@ def test_modelify_with_infer_input_example(
"pandas_orient": "split",
"type": "dataframe",
}


# 3 checks: success with pip requirements, fail with pip_requirements and conda_env, success with no conda_env
def test_modelify_with_pip_requirements(monkeypatch, kp_for_modelify):

monkeypatch.chdir(kp_for_modelify)

bootstrap_project(Path().cwd())
with KedroSession.create(project_path=Path().cwd()) as session:
context = session.load_context()
catalog = context.catalog
catalog.save("trained_model", 2)

runs_list_before_cmd = context.mlflow.server._mlflow_client.search_runs(
context.mlflow.tracking.experiment._experiment.experiment_id
)
cli_runner = CliRunner()

result = cli_runner.invoke(
cli_modelify,
[
"--pipeline",
"inference",
"--input-name",
"my_input_data",
"--pip-requirements",
"./src/requirements.txt",
],
catch_exceptions=True,
)

runs_list_after_cmd = context.mlflow.server._mlflow_client.search_runs(
context.mlflow.tracking.experiment._experiment.experiment_id
)

assert result.exit_code == 0

# check if there is a single new run
run_as_set = set(runs_list_after_cmd) - set(runs_list_before_cmd)
assert len(run_as_set) == 1
model_run_id = list(run_as_set)[0].info.run_id

# retrieve the requirements from the run
requirements_filepath = mlflow.pyfunc.get_model_dependencies(
f"runs:/{model_run_id}/model", format="pip"
)
assert Path(requirements_filepath).parts[-4:] == (
model_run_id,
"artifacts",
"model",
"requirements.txt",
)

with open(requirements_filepath) as fhandler:
assert r"kedro" in fhandler.read()


def test_modelify_with_default_conda_env(monkeypatch, kp_for_modelify):

monkeypatch.chdir(kp_for_modelify)

bootstrap_project(Path().cwd())
with KedroSession.create(project_path=Path().cwd()) as session:
context = session.load_context()
catalog = context.catalog
catalog.save("trained_model", 2)

runs_list_before_cmd = context.mlflow.server._mlflow_client.search_runs(
context.mlflow.tracking.experiment._experiment.experiment_id
)
cli_runner = CliRunner()

result = cli_runner.invoke(
cli_modelify,
[
"--pipeline",
"inference",
"--input-name",
"my_input_data",
],
catch_exceptions=True,
)

runs_list_after_cmd = context.mlflow.server._mlflow_client.search_runs(
context.mlflow.tracking.experiment._experiment.experiment_id
)

assert result.exit_code == 0

# check if there is a single new run
run_as_set = set(runs_list_after_cmd) - set(runs_list_before_cmd)
assert len(run_as_set) == 1
model_run_id = list(run_as_set)[0].info.run_id

# retrieve the requirements from the run
conda_filepath = mlflow.pyfunc.get_model_dependencies(
f"runs:/{model_run_id}/model", format="conda"
)

assert Path(conda_filepath).parts[-4:] == (
model_run_id,
"artifacts",
"model",
"conda.yaml",
)

with open(conda_filepath) as fhandler:
conda_env = fhandler.read()
assert f"kedro=={kedro_version}" in conda_env
assert f"python: {python_version()}" in conda_env


@pytest.mark.parametrize(
"dependencies_args",
[
["--conda-env", "xxx", "--pip-requirements", "xxx"],
["--conda-env", "xxx", "--extra-pip-requirements", "xxx"],
["--pip-requirements", "xxx", "--extra-pip-requirements", "xxx"],
[
"--conda-env",
"xxx",
"--pip-requirements",
"xxx",
"--extra-pip-requirements",
"xxx",
],
],
)
def test_modelify_fail_with_multiple_requirements(
monkeypatch, kp_for_modelify, dependencies_args
):

monkeypatch.chdir(kp_for_modelify)

bootstrap_project(Path().cwd())
with KedroSession.create(project_path=Path().cwd()) as session:
context = session.load_context()
catalog = context.catalog
catalog.save("trained_model", 2)

cli_runner = CliRunner()

cli_args = [
"--pipeline",
"inference",
"--input-name",
"my_input_data",
] + dependencies_args

result = cli_runner.invoke(
cli_modelify,
cli_args,
catch_exceptions=True,
)

assert result.exit_code == 1
assert (
"Only one of `conda_env`, `pip_requirements`, and `extra_pip_requirements` can be specified"
in str(result.exception)
)

0 comments on commit 5be41ef

Please sign in to comment.