Skip to content

Commit

Permalink
Merge pull request #6924 from nikelite/master
Browse files Browse the repository at this point in the history
Deprecate KFP v1 SDK support
  • Loading branch information
nikelite authored Oct 9, 2024
2 parents 1a07f18 + 12f2864 commit 79f378e
Show file tree
Hide file tree
Showing 35 changed files with 113 additions and 3,945 deletions.
1 change: 0 additions & 1 deletion build/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ sh_binary(
"//tfx/extensions/experimental/kfp_compatibility/proto:kfp_component_spec_pb2.py",
"//tfx/extensions/google_cloud_big_query/experimental/elwc_example_gen/proto:elwc_config_pb2.py",
"//tfx/orchestration/experimental/core:component_generated_alert_pb2.py",
"//tfx/orchestration/kubeflow/proto:kubeflow_pb2.py",
"//tfx/proto:bulk_inferrer_pb2.py",
"//tfx/proto:distribution_validator_pb2.py",
"//tfx/proto:evaluator_pb2.py",
Expand Down
9 changes: 6 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,13 @@ Homepage = "https://www.tensorflow.org/tfx"
Repository = "https://github.com/tensorflow/tfx"

[tool.pytest.ini_options]
addopts = "--verbose -m 'not end_to_end'"
addopts = "--import-mode=importlib"
testpaths = "tfx"
python_files = "*_test.py"
norecursedirs = ["custom_components", ".*", "*.egg"]
markers = [
"end_to_end: end-to-end tests which are slow and require more dependencies (deselect with '-m \"not end_to_end\"')",
"serial: mark tests that should not run in parallel"
"e2e: end-to-end tests which are slow and require more dependencies (deselect with '-m \"not end_to_end\"')",
"serial: mark tests that should not run in parallel",
"integration: integration tests that are slow and require more dependencies (deselect with `-m 'not integration'`)",
"perf: performance 'perf' tests that are slow and require more dependencies (deselect with `-m 'not perf'`)",
]
10 changes: 0 additions & 10 deletions pytest.ini

This file was deleted.

4 changes: 2 additions & 2 deletions test_constraints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@
Flask-session<0.6.0

#TODO(b/329181965): Remove once we migrate TFX to 2.16.
tensorflow<2.16
tensorflow-text<2.16
tensorflow==2.15.1
tensorflow-text==2.15.0
18 changes: 7 additions & 11 deletions tfx/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,8 @@ def make_pipeline_sdk_required_install_packages():
"google-api-python-client>=1.8,<2",
# TODO(b/176812386): Deprecate usage of jinja2 for placeholders.
"jinja2>=2.7.3,<4",
# typing-extensions allows consistent & future-proof interface for typing.
# Since kfp<2 uses typing-extensions<4, lower bound is the latest 3.x, and
# upper bound is <5 as the semver started from 4.0 according to their doc.
"typing-extensions>=3.10.0.2,<5",
# Upper bound is <5 as the semver started from 4.0 according to their doc.
"typing-extensions<5",
]


Expand All @@ -90,7 +88,7 @@ def make_required_install_packages():
"google-cloud-bigquery>=3,<4",
"grpcio>=1.28.1,<2",
"keras-tuner>=1.0.4,<2,!=1.4.0,!=1.4.1",
"kubernetes>=10.0.1,<13",
"kubernetes>=10.0.1,<27",
"numpy>=1.16,<2",
"pyarrow>=10,<11",
# TODO: b/358471141 - Orjson 3.10.7 breaks TFX OSS tests.
Expand Down Expand Up @@ -148,9 +146,8 @@ def make_extra_packages_airflow():
def make_extra_packages_kfp():
"""Prepare extra packages needed for Kubeflow Pipelines orchestrator."""
return [
# TODO(b/304892416): Migrate from KFP SDK v1 to v2.
"kfp>=1.8.14,<2",
"kfp-pipeline-spec>0.1.13,<0.2",
"kfp>=2",
"kfp-pipeline-spec>=0.2.2",
]


Expand All @@ -171,9 +168,8 @@ def make_extra_packages_test():
def make_extra_packages_docker_image():
# Packages needed for tfx docker image.
return [
# TODO(b/304892416): Migrate from KFP SDK v1 to v2.
"kfp>=1.8.14,<2",
"kfp-pipeline-spec>0.1.13,<0.2",
"kfp>=2",
"kfp-pipeline-spec>=0.2.2",
"mmh>=2.2,<3",
"python-snappy>=0.5,<0.6",
# Required for tfx/examples/penguin/penguin_utils_cloud_tuner.py
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def setUp(self):
self._experimental_root = os.path.dirname(__file__)
self._penguin_root = os.path.dirname(self._experimental_root)

self._pipeline_name = 'sklearn_test'
self._pipeline_name = 'sklearn-test'
self._data_root = os.path.join(self._penguin_root, 'data')
self._trainer_module_file = os.path.join(
self._experimental_root, 'penguin_utils_sklearn.py')
Expand Down Expand Up @@ -66,6 +66,8 @@ def testPipelineConstruction(self, resolve_mock):
beam_pipeline_args=[])
self.assertEqual(8, len(logical_pipeline.components))

tfx.orchestration.experimental.KubeflowDagRunner().run(logical_pipeline)
file_path = os.path.join(self.tmp_dir, 'sklearn_test.tar.gz')
tfx.orchestration.experimental.KubeflowV2DagRunner(
config=tfx.orchestration.experimental.KubeflowV2DagRunnerConfig(),
output_filename='sklearn_test.yaml').run(logical_pipeline)
file_path = os.path.join(self.tmp_dir, 'sklearn_test.yaml')
self.assertTrue(tfx.dsl.io.fileio.exists(file_path))
48 changes: 21 additions & 27 deletions tfx/examples/penguin/penguin_pipeline_kubeflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -501,33 +501,27 @@ def main():
else:
beam_pipeline_args = _beam_pipeline_args_by_runner['DirectRunner']

if use_vertex:
dag_runner = tfx.orchestration.experimental.KubeflowV2DagRunner(
config=tfx.orchestration.experimental.KubeflowV2DagRunnerConfig(),
output_filename=_pipeline_definition_file)
else:
dag_runner = tfx.orchestration.experimental.KubeflowDagRunner(
config=tfx.orchestration.experimental.KubeflowDagRunnerConfig(
kubeflow_metadata_config=tfx.orchestration.experimental
.get_default_kubeflow_metadata_config()))

dag_runner.run(
create_pipeline(
pipeline_name=_pipeline_name,
pipeline_root=_pipeline_root,
data_root=_data_root,
module_file=_module_file,
enable_tuning=False,
enable_cache=True,
user_provided_schema_path=_user_provided_schema,
ai_platform_training_args=_ai_platform_training_args,
ai_platform_serving_args=_ai_platform_serving_args,
beam_pipeline_args=beam_pipeline_args,
use_cloud_component=use_cloud_component,
use_aip=use_aip,
use_vertex=use_vertex,
serving_model_dir=_serving_model_dir,
))
dag_runner = tfx.orchestration.experimental.KubeflowV2DagRunner(
config=tfx.orchestration.experimental.KubeflowV2DagRunnerConfig(),
output_filename=_pipeline_definition_file)

dag_runner.run(
create_pipeline(
pipeline_name=_pipeline_name,
pipeline_root=_pipeline_root,
data_root=_data_root,
module_file=_module_file,
enable_tuning=False,
enable_cache=True,
user_provided_schema_path=_user_provided_schema,
ai_platform_training_args=_ai_platform_training_args,
ai_platform_serving_args=_ai_platform_serving_args,
beam_pipeline_args=beam_pipeline_args,
use_cloud_component=use_cloud_component,
use_aip=use_aip,
use_vertex=use_vertex,
serving_model_dir=_serving_model_dir,
))


# To compile the pipeline:
Expand Down
51 changes: 0 additions & 51 deletions tfx/examples/penguin/penguin_pipeline_kubeflow_e2e_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
from absl.testing import parameterized
from tfx.dsl.io import fileio
from tfx.examples.penguin import penguin_pipeline_kubeflow
from tfx.orchestration.kubeflow import test_utils as kubeflow_test_utils
from tfx.orchestration.kubeflow.v2.e2e_tests import base_test_case
from tfx.utils import io_utils

Expand Down Expand Up @@ -80,53 +79,3 @@ def testEndToEndPipelineRun(self, use_pipeline_spec_2_1):
use_pipeline_spec_2_1=use_pipeline_spec_2_1,
)
self.assertTrue(fileio.exists(self._serving_model_dir))

@pytest.mark.e2e
class PenguinPipelineKubeflowTest(kubeflow_test_utils.BaseKubeflowTest):

def setUp(self):
super().setUp()
penguin_examples_dir = os.path.join(self._REPO_BASE, 'tfx', 'examples',
'penguin')
penguin_test_data_root = os.path.join(penguin_examples_dir, 'data')
penguin_test_schema_file = os.path.join(penguin_examples_dir, 'schema',
'user_provided', 'schema.pbtxt')
self._penguin_module_file = os.path.join(penguin_examples_dir,
'penguin_utils_cloud_tuner.py')
self._penguin_data_root = os.path.join(self._test_data_dir, 'data')
self._penguin_schema_file = os.path.join(self._test_data_dir,
'schema.pbtxt')

io_utils.copy_dir(penguin_test_data_root, self._penguin_data_root)
io_utils.copy_file(
penguin_test_schema_file, self._penguin_schema_file, overwrite=True)

def testEndToEndPipelineRun(self):
"""End-to-end test for pipeline with RuntimeParameter."""
pipeline_name = 'kubeflow-v1-e2e-test-{}'.format(self._test_id)
kubeflow_pipeline = penguin_pipeline_kubeflow.create_pipeline(
pipeline_name=pipeline_name,
pipeline_root=self._pipeline_root(pipeline_name),
data_root=self._penguin_data_root,
module_file=self._penguin_module_file,
enable_tuning=False,
enable_cache=True,
user_provided_schema_path=self._penguin_schema_file,
ai_platform_training_args=penguin_pipeline_kubeflow
._ai_platform_training_args,
ai_platform_serving_args=penguin_pipeline_kubeflow
._ai_platform_serving_args,
beam_pipeline_args=penguin_pipeline_kubeflow
._beam_pipeline_args_by_runner['DirectRunner'],
use_cloud_component=False,
use_aip=False,
use_vertex=False,
serving_model_dir=self._serving_model_dir)

parameters = {
'train-args': '{"num_steps": 100}',
'eval-args': '{"num_steps": 50}',
}
self._compile_and_run_pipeline(
pipeline=kubeflow_pipeline, parameters=parameters)
self.assertTrue(fileio.exists(self._serving_model_dir))
25 changes: 8 additions & 17 deletions tfx/examples/penguin/penguin_pipeline_kubeflow_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,20 +63,11 @@ def testPenguinPipelineConstructionAndDefinitionFileExists(
serving_model_dir=penguin_pipeline_kubeflow._serving_model_dir)
self.assertLen(kubeflow_pipeline.components, 9)

if use_vertex:
v2_dag_runner = orchestration.experimental.KubeflowV2DagRunner(
config=orchestration.experimental.KubeflowV2DagRunnerConfig(),
output_dir=self.tmp_dir,
output_filename=penguin_pipeline_kubeflow._pipeline_definition_file)
v2_dag_runner.run(kubeflow_pipeline)
file_path = os.path.join(
self.tmp_dir, penguin_pipeline_kubeflow._pipeline_definition_file)
self.assertTrue(fileio.exists(file_path))
else:
v1_dag_runner = orchestration.experimental.KubeflowDagRunner(
config=orchestration.experimental.KubeflowDagRunnerConfig(
kubeflow_metadata_config=orchestration.experimental
.get_default_kubeflow_metadata_config()))
v1_dag_runner.run(kubeflow_pipeline)
file_path = os.path.join(self.tmp_dir, 'penguin-kubeflow.tar.gz')
self.assertTrue(fileio.exists(file_path))
v2_dag_runner = orchestration.experimental.KubeflowV2DagRunner(
config=orchestration.experimental.KubeflowV2DagRunnerConfig(),
output_dir=self.tmp_dir,
output_filename=penguin_pipeline_kubeflow._pipeline_definition_file)
v2_dag_runner.run(kubeflow_pipeline)
file_path = os.path.join(
self.tmp_dir, penguin_pipeline_kubeflow._pipeline_definition_file)
self.assertTrue(fileio.exists(file_path))
Loading

0 comments on commit 79f378e

Please sign in to comment.