From cfab8a4c487e9eb02de1ee6b38de0621f1f33f08 Mon Sep 17 00:00:00 2001 From: Doojin Park Date: Thu, 5 Dec 2024 07:40:05 +0900 Subject: [PATCH 1/4] Fix: Vertex ML pipeline test failures (#7727) * Update wheel build to include data files for Vertex tests * Fix Keras Tuner compatibility issue with the example Keras model * Update the taxi template model to avoid using deprecated Keras APIs * Restore exit handlers that were removed with KFP v1 support --- package_build/initialize.sh | 1 + .../chicago_taxi_pipeline/taxi_utils.py | 3 +- .../taxi/models/keras_model/model.py | 2 +- tfx/orchestration/kubeflow/v2/test_utils.py | 37 ++++++++++--------- .../expected_full_taxi_pipeline_job.json | 2 +- .../expected_full_taxi_pipeline_job.json | 2 +- tfx/tools/docker/Dockerfile | 3 +- tfx/v1/orchestration/experimental/__init__.py | 6 +-- 8 files changed, 29 insertions(+), 27 deletions(-) diff --git a/package_build/initialize.sh b/package_build/initialize.sh index 4b8dc7c0a4..03f6f2080a 100755 --- a/package_build/initialize.sh +++ b/package_build/initialize.sh @@ -27,6 +27,7 @@ do ln -sf $BASEDIR/setup.py $BASEDIR/package_build/$CONFIG_NAME/ ln -sf $BASEDIR/dist $BASEDIR/package_build/$CONFIG_NAME/ ln -sf $BASEDIR/tfx $BASEDIR/package_build/$CONFIG_NAME/ + ln -sf $BASEDIR/MANIFEST.in $BASEDIR/package_build/$CONFIG_NAME/ ln -sf $BASEDIR/README*.md $BASEDIR/package_build/$CONFIG_NAME/ ln -sf $BASEDIR/LICENSE $BASEDIR/package_build/$CONFIG_NAME/ diff --git a/tfx/examples/chicago_taxi_pipeline/taxi_utils.py b/tfx/examples/chicago_taxi_pipeline/taxi_utils.py index 42ee24ce23..214aa29de9 100644 --- a/tfx/examples/chicago_taxi_pipeline/taxi_utils.py +++ b/tfx/examples/chicago_taxi_pipeline/taxi_utils.py @@ -246,7 +246,6 @@ def _build_keras_model( output = tf.keras.layers.Dense(1, activation='sigmoid')( tf.keras.layers.concatenate([deep, wide]) ) - output = tf.squeeze(output, -1) model = tf.keras.Model(input_layers, output) model.compile( @@ -371,4 +370,4 @@ def run_fn(fn_args: fn_args_utils.FnArgs): model, tf_transform_output ), } - model.save(fn_args.serving_model_dir, save_format='tf', signatures=signatures) + tf.saved_model.save(model, fn_args.serving_model_dir, signatures=signatures) diff --git a/tfx/experimental/templates/taxi/models/keras_model/model.py b/tfx/experimental/templates/taxi/models/keras_model/model.py index 9cad95aed8..19611bf92a 100644 --- a/tfx/experimental/templates/taxi/models/keras_model/model.py +++ b/tfx/experimental/templates/taxi/models/keras_model/model.py @@ -215,4 +215,4 @@ def run_fn(fn_args): 'transform_features': _get_transform_features_signature(model, tf_transform_output), } - model.save(fn_args.serving_model_dir, save_format='tf', signatures=signatures) + tf.saved_model.save(model, fn_args.serving_model_dir, signatures=signatures) diff --git a/tfx/orchestration/kubeflow/v2/test_utils.py b/tfx/orchestration/kubeflow/v2/test_utils.py index 6491e73317..98cc73105f 100644 --- a/tfx/orchestration/kubeflow/v2/test_utils.py +++ b/tfx/orchestration/kubeflow/v2/test_utils.py @@ -234,25 +234,28 @@ def create_pipeline_components( model_blessing=tfx.dsl.Channel( type=tfx.types.standard_artifacts.ModelBlessing)).with_id( 'Resolver.latest_blessed_model_resolver') - # Set the TFMA config for Model Evaluation and Validation. + # Uses TFMA to compute a evaluation statistics over features of a model and + # perform quality validation of a candidate model (compared to a baseline). eval_config = tfma.EvalConfig( - model_specs=[tfma.ModelSpec(signature_name='eval')], - metrics_specs=[ - tfma.MetricsSpec( - metrics=[tfma.MetricConfig(class_name='ExampleCount')], - thresholds={ - 'binary_accuracy': - tfma.MetricThreshold( - value_threshold=tfma.GenericValueThreshold( - lower_bound={'value': 0.5}), - change_threshold=tfma.GenericChangeThreshold( - direction=tfma.MetricDirection.HIGHER_IS_BETTER, - absolute={'value': -1e-10})) - }) + model_specs=[ + tfma.ModelSpec( + signature_name='serving_default', label_key='tips_xf', + preprocessing_function_names=['transform_features']) ], - slicing_specs=[ - tfma.SlicingSpec(), - tfma.SlicingSpec(feature_keys=['trip_start_hour']) + slicing_specs=[tfma.SlicingSpec()], + metrics_specs=[ + tfma.MetricsSpec(metrics=[ + tfma.MetricConfig( + class_name='BinaryAccuracy', + threshold=tfma.MetricThreshold( + value_threshold=tfma.GenericValueThreshold( + lower_bound={'value': 0.6}), + # Change threshold will be ignored if there is no + # baseline model resolved from MLMD (first run). + change_threshold=tfma.GenericChangeThreshold( + direction=tfma.MetricDirection.HIGHER_IS_BETTER, + absolute={'value': -1e-10}))) + ]) ]) evaluator = tfx.components.Evaluator( examples=example_gen.outputs['examples'], diff --git a/tfx/orchestration/kubeflow/v2/testdata/expected_full_taxi_pipeline_job.json b/tfx/orchestration/kubeflow/v2/testdata/expected_full_taxi_pipeline_job.json index 92db9633ab..fba1cf9072 100644 --- a/tfx/orchestration/kubeflow/v2/testdata/expected_full_taxi_pipeline_job.json +++ b/tfx/orchestration/kubeflow/v2/testdata/expected_full_taxi_pipeline_job.json @@ -706,7 +706,7 @@ "parameters": { "eval_config": { "runtimeValue": { - "constant": "{\n \"metrics_specs\": [\n {\n \"metrics\": [\n {\n \"class_name\": \"ExampleCount\"\n }\n ],\n \"thresholds\": {\n \"binary_accuracy\": {\n \"change_threshold\": {\n \"absolute\": -1e-10,\n \"direction\": \"HIGHER_IS_BETTER\"\n },\n \"value_threshold\": {\n \"lower_bound\": 0.5\n }\n }\n }\n }\n ],\n \"model_specs\": [\n {\n \"signature_name\": \"eval\"\n }\n ],\n \"slicing_specs\": [\n {},\n {\n \"feature_keys\": [\n \"trip_start_hour\"\n ]\n }\n ]\n}" + "constant": "{\n \"metrics_specs\": [\n {\n \"metrics\": [\n {\n \"class_name\": \"BinaryAccuracy\",\n \"threshold\": {\n \"change_threshold\": {\n \"absolute\": -1e-10,\n \"direction\": \"HIGHER_IS_BETTER\"\n },\n \"value_threshold\": {\n \"lower_bound\": 0.6\n }\n }\n }\n ]\n }\n ],\n \"model_specs\": [\n {\n \"label_key\": \"tips_xf\",\n \"preprocessing_function_names\": [\n \"transform_features\"\n ],\n \"signature_name\": \"serving_default\"\n }\n ],\n \"slicing_specs\": [\n {}\n ]\n}" } }, "example_splits": { diff --git a/tfx/orchestration/kubeflow/v2/testdata/legacy/expected_full_taxi_pipeline_job.json b/tfx/orchestration/kubeflow/v2/testdata/legacy/expected_full_taxi_pipeline_job.json index da72f2eb64..8af0c0f92a 100644 --- a/tfx/orchestration/kubeflow/v2/testdata/legacy/expected_full_taxi_pipeline_job.json +++ b/tfx/orchestration/kubeflow/v2/testdata/legacy/expected_full_taxi_pipeline_job.json @@ -698,7 +698,7 @@ "eval_config": { "runtimeValue": { "constantValue": { - "stringValue": "{\n \"metrics_specs\": [\n {\n \"metrics\": [\n {\n \"class_name\": \"ExampleCount\"\n }\n ],\n \"thresholds\": {\n \"binary_accuracy\": {\n \"change_threshold\": {\n \"absolute\": -1e-10,\n \"direction\": \"HIGHER_IS_BETTER\"\n },\n \"value_threshold\": {\n \"lower_bound\": 0.5\n }\n }\n }\n }\n ],\n \"model_specs\": [\n {\n \"signature_name\": \"eval\"\n }\n ],\n \"slicing_specs\": [\n {},\n {\n \"feature_keys\": [\n \"trip_start_hour\"\n ]\n }\n ]\n}" + "stringValue": "{\n \"metrics_specs\": [\n {\n \"metrics\": [\n {\n \"class_name\": \"BinaryAccuracy\",\n \"threshold\": {\n \"change_threshold\": {\n \"absolute\": -1e-10,\n \"direction\": \"HIGHER_IS_BETTER\"\n },\n \"value_threshold\": {\n \"lower_bound\": 0.6\n }\n }\n }\n ]\n }\n ],\n \"model_specs\": [\n {\n \"label_key\": \"tips_xf\",\n \"preprocessing_function_names\": [\n \"transform_features\"\n ],\n \"signature_name\": \"serving_default\"\n }\n ],\n \"slicing_specs\": [\n {}\n ]\n}" } } }, diff --git a/tfx/tools/docker/Dockerfile b/tfx/tools/docker/Dockerfile index 4278f4beef..9fa9938175 100644 --- a/tfx/tools/docker/Dockerfile +++ b/tfx/tools/docker/Dockerfile @@ -27,8 +27,7 @@ WORKDIR ${TFX_DIR} ARG TFX_DEPENDENCY_SELECTOR ENV TFX_DEPENDENCY_SELECTOR=${TFX_DEPENDENCY_SELECTOR} -RUN python -m pip install --upgrade pip wheel setuptools -RUN python -m pip install tomli +RUN python -m pip install --upgrade pip wheel setuptools tomli # TODO(b/175089240): clean up conditional checks on whether ml-pipelines-sdk is # built after TFX versions <= 0.25 are no longer eligible for cherry-picks. diff --git a/tfx/v1/orchestration/experimental/__init__.py b/tfx/v1/orchestration/experimental/__init__.py index 7da280b36e..43c5c89a31 100644 --- a/tfx/v1/orchestration/experimental/__init__.py +++ b/tfx/v1/orchestration/experimental/__init__.py @@ -14,6 +14,9 @@ """TFX orchestration.experimental module.""" try: + from tfx.orchestration.kubeflow.decorators import exit_handler # pylint: disable=g-import-not-at-top + from tfx.orchestration.kubeflow.decorators import FinalStatusStr # pylint: disable=g-import-not-at-top + from tfx.orchestration.kubeflow.v2.kubeflow_v2_dag_runner import ( KubeflowV2DagRunner, KubeflowV2DagRunnerConfig, @@ -24,11 +27,8 @@ __all__ = [ "FinalStatusStr", - "KubeflowDagRunner", - "KubeflowDagRunnerConfig", "KubeflowV2DagRunner", "KubeflowV2DagRunnerConfig", - "LABEL_KFP_SDK_ENV", "exit_handler", "get_default_kubeflow_metadata_config", ] From 497522932818e3e2ab2d79822e002541ff1d7bb5 Mon Sep 17 00:00:00 2001 From: Madhur Karampudi <142544288+vkarampudi@users.noreply.github.com> Date: Thu, 5 Dec 2024 20:01:43 -0800 Subject: [PATCH 2/4] TFX 1.16.0 Release (#7726) --- README.md | 5 +-- RELEASE.md | 31 ++++++++++++++++++ nightly_test_constraints.txt | 53 +++++++++++-------------------- test_constraints.txt | 46 +++++++++++---------------- tfx/dependencies.py | 22 ++++++------- tfx/tools/docker/requirements.txt | 1 + tfx/version.py | 2 +- tfx/workspace.bzl | 4 +-- 8 files changed, 87 insertions(+), 77 deletions(-) diff --git a/README.md b/README.md index b71d438afc..a8f93058c4 100644 --- a/README.md +++ b/README.md @@ -62,8 +62,9 @@ but other *untested* combinations may also work. tfx | Python | apache-beam[gcp] | ml-metadata | pyarrow | tensorflow | tensorflow-data-validation | tensorflow-metadata | tensorflow-model-analysis | tensorflow-serving-api | tensorflow-transform | tfx-bsl ------------------------------------------------------------------------- | -------------------- | ---------------- | ----------- | ------- | ----------------- | -------------------------- | ------------------- | ------------------------- | ---------------------- | -------------------- | ------- -[GitHub master](https://github.com/tensorflow/tfx/blob/master/RELEASE.md) | >=3.9,<3.11 | 2.47.0 | 1.15.0 | 10.0.0 | nightly (2.x) | 1.15.1 | 1.15.0 | 0.46.0 | 2.15.1 | 1.15.0 | 1.15.1 -[1.15.0](https://github.com/tensorflow/tfx/blob/v1.15.0/RELEASE.md) | >=3.9,<3.11 | 2.47.0 | 1.15.0 | 10.0.0 | 2.15 | 1.15.1 | 1.15.0 | 0.46.0 | 2.15.1 | 1.15.0 | 1.15.1 +[GitHub master](https://github.com/tensorflow/tfx/blob/master/RELEASE.md) | >=3.9,<3.11 | 2.59.0 | 1.16.0 | 10.0.1 | nightly (2.x) | 1.16.1 | 1.16.1 | 0.47.0 | 2.16.1 | 1.16.0 | 1.16.1 +[1.16.0](https://github.com/tensorflow/tfx/blob/v1.16.0/RELEASE.md) | >=3.9,<3.11 | 2.59.0 | 1.16.0 | 10.0.1 | 2.16 | 1.16.1 | 1.16.1 | 0.47.0 | 2.16.1 | 1.16.0 | 1.16.1 +[1.15.0](https://github.com/tensorflow/tfx/blob/v1.15.0/RELEASE.md) | >=3.9,<3.11 | 2.47.0 | 1.15.0 | 10.0.0 | 2.15 | 1.15.1 | 1.15.0 | 0.46.0 | 2.15.1 | 1.15.0 | 1.15.1 [1.14.0](https://github.com/tensorflow/tfx/blob/v1.14.0/RELEASE.md) | >=3.8,<3.11 | 2.47.0 | 1.14.0 | 10.0.0 | 2.13 | 1.14.0 | 1.14.0 | 0.45.0 | 2.9.0 | 1.14.0 | 1.14.0 [1.13.0](https://github.com/tensorflow/tfx/blob/v1.13.0/RELEASE.md) | >=3.8,<3.10 | 2.40.0 | 1.13.1 | 6.0.0 | 2.12 | 1.13.0 | 1.13.1 | 0.44.0 | 2.9.0 | 1.13.0 | 1.13.0 [1.12.0](https://github.com/tensorflow/tfx/blob/v1.12.0/RELEASE.md) | >=3.7,<3.10 | 2.40.0 | 1.12.0 | 6.0.0 | 2.11 | 1.12.0 | 1.12.0 | 0.43.0 | 2.9.0 | 1.12.0 | 1.12.0 diff --git a/RELEASE.md b/RELEASE.md index fbafb8db13..9f9f03b4af 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -4,6 +4,26 @@ ## Breaking Changes +### For Pipeline Authors + +### For Component Authors + +## Deprecations + +## Bug Fixes and Other Changes + +## Dependency Updates + +## Documentation Updates + +# Version 1.16.0 + +## Major Features and Improvements + +* N/A + +## Breaking Changes + * `Placeholder.__format__()` is now disallowed, so you cannot use placeholders in f-strings and `str.format()` calls anymore. If you get an error from this, most likely you discovered a bug and should not use an f-string in the first @@ -13,12 +33,21 @@ ### For Pipeline Authors +* N/A + ### For Component Authors +* N/A + ## Deprecations +* KubeflowDagRunner (KFP v1 SDK) is deprecated. Use KubeflowV2DagRunner (KFP v2 pipeline spec) instead. +* Since Estimators will no longer be available in TensorFlow 2.16 and later versions, we have deprecated examples and templates that use them. We encourage you to explore Keras as a more modern and flexible high-level API for building and training models in TensorFlow. + ## Bug Fixes and Other Changes +* N/A + ## Dependency Updates | Package Name | Version Constraints | Previously (in `v1.15.1`) | Comments | | -- | -- | -- | -- | @@ -26,6 +55,8 @@ ## Documentation Updates +* N/A + # Version 1.15.1 ## Major Features and Improvements diff --git a/nightly_test_constraints.txt b/nightly_test_constraints.txt index 1055bda932..9bd75cb146 100644 --- a/nightly_test_constraints.txt +++ b/nightly_test_constraints.txt @@ -11,28 +11,17 @@ # TODO(b/321609768): Remove pinned Flask-session version after resolving the issue. Flask-session<0.6.0 -#TODO(b/329181965): Remove once we migrate TFX to 2.16. -tensorflow==2.15.1 -tensorflow-text==2.15.0 +tensorflow==2.16.2 +tensorflow-text==2.16.1 +keras==3.6.0 absl-py==1.4.0 aiohappyeyeballs==2.4.3 -aiohttp==3.10.9 aiosignal==1.3.1 alembic==1.13.3 annotated-types==0.7.0 anyio==4.6.0 -apache-airflow==2.10.2 -apache-airflow-providers-common-compat==1.2.1rc1 -apache-airflow-providers-common-io==1.4.2rc1 -apache-airflow-providers-common-sql==1.18.0rc1 -apache-airflow-providers-fab==1.4.1rc1 -apache-airflow-providers-ftp==3.11.1 -apache-airflow-providers-http==4.13.1 -apache-airflow-providers-imap==3.7.0 -apache-airflow-providers-mysql==5.7.2rc1 -apache-airflow-providers-smtp==1.8.0 -apache-airflow-providers-sqlite==3.9.0 +apache-airflow==2.10.3 apache-beam==2.59.0 apispec==6.6.1 argcomplete==3.5.1 @@ -91,7 +80,6 @@ fasteners==0.19 fastjsonschema==2.20.0 filelock==3.16.1 Flask==2.2.5 -Flask-AppBuilder==4.5.0 Flask-Babel==2.0.0 Flask-Caching==2.3.0 Flask-JWT-Extended==4.6.0 @@ -152,7 +140,6 @@ importlib_resources==6.4.5 inflection==0.5.1 iniconfig==2.0.0 ipykernel==6.29.5 -ipython==7.34.0 ipython-genutils==0.2.0 ipywidgets==7.8.4 isoduration==20.11.0 @@ -179,7 +166,7 @@ jupyterlab==4.2.5 jupyterlab_pygments==0.3.0 jupyterlab_server==2.27.3 jupyterlab_widgets==1.1.10 -keras==2.15.0 +tf-keras==2.16.0 keras-tuner==1.4.7 kfp==2.5.0 kfp-pipeline-spec==0.2.2 @@ -205,12 +192,12 @@ mdurl==0.1.2 methodtools==0.4.7 mistune==3.0.2 ml-dtypes==0.3.2 -ml-metadata>=1.17.0.dev20241016 +ml-metadata>=1.16.0 mmh==2.2 more-itertools==10.5.0 msgpack==1.1.0 multidict==6.1.0 -mysql-connector-python==9.0.0 +mysql-connector-python==9.1.0 mysqlclient==2.2.4 nbclient==0.10.0 nbconvert==7.16.4 @@ -262,7 +249,6 @@ proto-plus==1.24.0 protobuf==3.20.3 psutil==6.0.0 ptyprocess==0.7.0 -pyarrow==10.0.1 pyarrow-hotfix==0.6 pyasn1==0.6.1 pyasn1_modules==0.4.1 @@ -316,33 +302,33 @@ SQLAlchemy==1.4.54 SQLAlchemy-JSONField==1.0.2 SQLAlchemy-Utils==0.41.2 sqlparse==0.5.1 -struct2tensor>=0.47.0.dev20240430; extra == "all" +struct2tensor>=0.47.0 tabulate==0.9.0 tenacity==9.0.0 -tensorboard==2.15.2 +tensorboard==2.16.2 tensorboard-data-server==0.7.2 -tensorflow==2.15.1 +tensorflow==2.16.2 tensorflow-cloud==0.1.16 -tensorflow-data-validation>=1.16.0.dev20240508 +tensorflow-data-validation>=1.16.1 tensorflow-datasets==4.9.3 -tensorflow-decision-forests==1.8.1 +tensorflow-decision-forests==1.9.2 tensorflow-estimator==2.15.0 tensorflow-hub==0.15.0 tensorflow-io==0.24.0 tensorflow-io-gcs-filesystem==0.24.0 -tensorflow-metadata>=1.17.0.dev20241016 -tensorflow-ranking==0.5.5 -tensorflow-serving-api==2.15.1 -tensorflow-text==2.15.0 -tensorflow-transform>=1.16.0.dev20240430 -tensorflow_model_analysis>=0.47.0.dev20240617 +tensorflow-metadata>=1.16.1 +# tensorflow-ranking==0.5.5 +tensorflow-serving-api==2.16.1 +tensorflow-text==2.16.1 +tensorflow-transform>=1.16.0 +tensorflow_model_analysis>=0.47.0 tensorflowjs==4.17.0 tensorstore==0.1.66 termcolor==2.5.0 terminado==0.18.1 text-unidecode==1.3 tflite-support==0.4.4 -tfx-bsl>=1.16.0.dev20240430 +tfx-bsl>=1.16.1 threadpoolctl==3.5.0 time-machine==2.16.0 tinycss2==1.3.0 @@ -367,7 +353,6 @@ wcwidth==0.2.13 webcolors==24.8.0 webencodings==0.5.1 websocket-client==0.59.0 -Werkzeug==2.2.3 widgetsnbextension==3.6.9 wirerope==0.4.7 wrapt==1.14.1 diff --git a/test_constraints.txt b/test_constraints.txt index 34c162df19..0433e34857 100644 --- a/test_constraints.txt +++ b/test_constraints.txt @@ -11,28 +11,17 @@ # TODO(b/321609768): Remove pinned Flask-session version after resolving the issue. Flask-session<0.6.0 -#TODO(b/329181965): Remove once we migrate TFX to 2.16. -tensorflow==2.15.1 -tensorflow-text==2.15.0 +tensorflow==2.16.2 +tensorflow-text==2.16.1 +keras==3.6.0 absl-py==1.4.0 aiohappyeyeballs==2.4.3 -aiohttp==3.10.9 aiosignal==1.3.1 alembic==1.13.3 annotated-types==0.7.0 anyio==4.6.0 -apache-airflow==2.10.2 -apache-airflow-providers-common-compat==1.2.1rc1 -apache-airflow-providers-common-io==1.4.2rc1 -apache-airflow-providers-common-sql==1.18.0rc1 -apache-airflow-providers-fab==1.4.1rc1 -apache-airflow-providers-ftp==3.11.1 -apache-airflow-providers-http==4.13.1 -apache-airflow-providers-imap==3.7.0 -apache-airflow-providers-mysql==5.7.2rc1 -apache-airflow-providers-smtp==1.8.0 -apache-airflow-providers-sqlite==3.9.0 +apache-airflow==2.10.3 apache-beam==2.59.0 apispec==6.6.1 argcomplete==3.5.1 @@ -91,7 +80,6 @@ fasteners==0.19 fastjsonschema==2.20.0 filelock==3.16.1 Flask==2.2.5 -Flask-AppBuilder==4.5.0 Flask-Babel==2.0.0 Flask-Caching==2.3.0 Flask-JWT-Extended==4.6.0 @@ -152,7 +140,6 @@ importlib_resources==6.4.5 inflection==0.5.1 iniconfig==2.0.0 ipykernel==6.29.5 -ipython==7.34.0 ipython-genutils==0.2.0 ipywidgets==7.8.4 isoduration==20.11.0 @@ -179,7 +166,7 @@ jupyterlab==4.2.5 jupyterlab_pygments==0.3.0 jupyterlab_server==2.27.3 jupyterlab_widgets==1.1.10 -keras==2.15.0 +tf-keras==2.16.0 keras-tuner==1.4.7 kfp==2.5.0 kfp-pipeline-spec==0.2.2 @@ -205,11 +192,12 @@ mdurl==0.1.2 methodtools==0.4.7 mistune==3.0.2 ml-dtypes==0.3.2 +ml-metadata>=1.16.0 mmh==2.2 more-itertools==10.5.0 msgpack==1.1.0 multidict==6.1.0 -mysql-connector-python==9.0.0 +mysql-connector-python==9.1.0 mysqlclient==2.2.4 nbclient==0.10.0 nbconvert==7.16.4 @@ -261,7 +249,6 @@ proto-plus==1.24.0 protobuf==3.20.3 psutil==6.0.0 ptyprocess==0.7.0 -pyarrow==10.0.1 pyarrow-hotfix==0.6 pyasn1==0.6.1 pyasn1_modules==0.4.1 @@ -315,27 +302,33 @@ SQLAlchemy==1.4.54 SQLAlchemy-JSONField==1.0.2 SQLAlchemy-Utils==0.41.2 sqlparse==0.5.1 +struct2tensor>=0.47.0 tabulate==0.9.0 tenacity==9.0.0 -tensorboard==2.15.2 +tensorboard==2.16.2 tensorboard-data-server==0.7.2 -tensorflow==2.15.1 +tensorflow==2.16.2 tensorflow-cloud==0.1.16 +tensorflow-data-validation>=1.16.1 tensorflow-datasets==4.9.3 -tensorflow-decision-forests==1.8.1 +tensorflow-decision-forests==1.9.2 tensorflow-estimator==2.15.0 tensorflow-hub==0.15.0 tensorflow-io==0.24.0 tensorflow-io-gcs-filesystem==0.24.0 -tensorflow-ranking==0.5.5 -tensorflow-serving-api==2.15.1 -tensorflow-text==2.15.0 +tensorflow-metadata>=1.16.1 +# tensorflow-ranking==0.5.5 +tensorflow-serving-api==2.16.1 +tensorflow-text==2.16.1 +tensorflow-transform>=1.16.0 +tensorflow_model_analysis>=0.47.0 tensorflowjs==4.17.0 tensorstore==0.1.66 termcolor==2.5.0 terminado==0.18.1 text-unidecode==1.3 tflite-support==0.4.4 +tfx-bsl>=1.16.1 threadpoolctl==3.5.0 time-machine==2.16.0 tinycss2==1.3.0 @@ -360,7 +353,6 @@ wcwidth==0.2.13 webcolors==24.8.0 webencodings==0.5.1 websocket-client==0.59.0 -Werkzeug==2.2.3 widgetsnbextension==3.6.9 wirerope==0.4.7 wrapt==1.14.1 diff --git a/tfx/dependencies.py b/tfx/dependencies.py index 7666dd185a..ca8469aefc 100644 --- a/tfx/dependencies.py +++ b/tfx/dependencies.py @@ -58,9 +58,9 @@ def make_pipeline_sdk_required_install_packages(): "ml-metadata" + select_constraint( # LINT.IfChange - default=">=1.15.0,<1.16.0", + default=">=1.16.0,<1.17.0", # LINT.ThenChange(tfx/workspace.bzl) - nightly=">=1.16.0.dev", + nightly=">=1.17.0.dev", git_master="@git+https://github.com/google/ml-metadata@master", ), "packaging>=22", @@ -105,31 +105,31 @@ def make_required_install_packages(): # Pip might stuck in a TF 1.15 dependency although there is a working # dependency set with TF 2.x without the sync. # pylint: disable=line-too-long - "tensorflow" + select_constraint(">=2.15.0,<2.16"), + "tensorflow" + select_constraint(">=2.16.0,<2.17"), # pylint: enable=line-too-long "tensorflow-hub>=0.15.0,<0.16", "tensorflow-data-validation" + select_constraint( - default=">=1.15.1,<1.16.0", - nightly=">=1.16.0.dev", + default=">=1.16.1,<1.17.0", + nightly=">=1.16.1.dev", git_master=("@git+https://github.com/tensorflow/data-validation@master"), ), "tensorflow-model-analysis" + select_constraint( - default=">=0.46.0,<0.47.0", + default=">=0.47.0,<0.48.0", nightly=">=0.47.0.dev", git_master="@git+https://github.com/tensorflow/model-analysis@master", ), - "tensorflow-serving-api>=2.15,<2.16", + "tensorflow-serving-api>=2.16,<2.17", "tensorflow-transform" + select_constraint( - default=">=1.15.0,<1.16.0", + default=">=1.16.0,<1.17.0", nightly=">=1.16.0.dev", git_master="@git+https://github.com/tensorflow/transform@master", ), "tfx-bsl" + select_constraint( - default=">=1.15.1,<1.16.0", + default=">=1.16.1,<1.17.0", nightly=">=1.16.0.dev", git_master="@git+https://github.com/tensorflow/tfx-bsl@master", ), @@ -199,7 +199,7 @@ def make_extra_packages_tf_ranking(): "tensorflow-ranking>=0.5,<0.6", "struct2tensor" + select_constraint( - default=">=0.46.0,<0.47.0", + default=">=0.47.0,<0.48.0", nightly=">=0.47.0.dev", git_master="@git+https://github.com/google/struct2tensor@master", ), @@ -211,7 +211,7 @@ def make_extra_packages_tfdf(): # Required for tfx/examples/penguin/penguin_utils_tfdf_experimental.py return [ # NOTE: TFDF 1.0.1 is only compatible with TF 2.10.x. - "tensorflow-decision-forests>=1.0.1,<1.9", + "tensorflow-decision-forests>=1.8.1,<2", ] diff --git a/tfx/tools/docker/requirements.txt b/tfx/tools/docker/requirements.txt index 479f41021e..080c4a941f 100644 --- a/tfx/tools/docker/requirements.txt +++ b/tfx/tools/docker/requirements.txt @@ -158,6 +158,7 @@ jupyterlab_pygments==0.3.0 jupyterlab_server==2.27.3 jupyterlab_widgets==1.1.10 tf-keras==2.16.0 +keras==3.6.0 keras-tuner==1.4.7 kfp==2.5.0 kfp-pipeline-spec==0.2.2 diff --git a/tfx/version.py b/tfx/version.py index 3b49d5f8bf..01a6b18e01 100644 --- a/tfx/version.py +++ b/tfx/version.py @@ -14,4 +14,4 @@ """Contains the version string of TFX.""" # Note that setup.py uses this version. -__version__ = '1.16.0.dev' +__version__ = '1.17.0.dev' diff --git a/tfx/workspace.bzl b/tfx/workspace.bzl index 6a92fad069..19bb617e3d 100644 --- a/tfx/workspace.bzl +++ b/tfx/workspace.bzl @@ -79,7 +79,7 @@ def tfx_workspace(): name = "com_github_google_ml_metadata", repo = "google/ml-metadata", # LINT.IfChange - tag = "v1.15.0", + tag = "v1.16.0", # LINT.ThenChange(//tfx/dependencies.py) ) @@ -89,6 +89,6 @@ def tfx_workspace(): repo = "tensorflow/metadata", # LINT.IfChange # Keep in sync with TFDV version (TFDV requires TFMD). - tag = "v1.15.0", + tag = "v1.16.1", # LINT.ThenChange(//tfx/dependencies.py) ) From a4f29a0f9b9e2f1ad112982af9c19f881c629d1b Mon Sep 17 00:00:00 2001 From: Madhur Karampudi <142544288+vkarampudi@users.noreply.github.com> Date: Sun, 8 Dec 2024 21:24:36 -0800 Subject: [PATCH 3/4] Removing tf-ranking as a dependency untill it supports tf 2.16 (#7725) --- tfx/examples/ranking/features.py | 35 +- .../ranking/ranking_pipeline_e2e_test.py | 47 +-- tfx/examples/ranking/ranking_utils.py | 314 ++++-------------- .../struct2tensor_parsing_utils_test.py | 164 +++++---- 4 files changed, 195 insertions(+), 365 deletions(-) diff --git a/tfx/examples/ranking/features.py b/tfx/examples/ranking/features.py index e338240750..4863da52d6 100644 --- a/tfx/examples/ranking/features.py +++ b/tfx/examples/ranking/features.py @@ -17,36 +17,37 @@ These names will be shared between the transform and the model. """ -import tensorflow as tf -from tfx.examples.ranking import struct2tensor_parsing_utils +# import tensorflow as tf +# This is due to TF Ranking not supporting TensorFlow 2.16, We should re-enable it when support is added. +# from tfx.examples.ranking import struct2tensor_parsing_utils # Labels are expected to be dense. In case of a batch of ELWCs have different # number of documents, the shape of the label is [N, D], where N is the batch # size, D is the maximum number of documents in the batch. If an ELWC in the # batch has D_0 < D documents, then the value of label at D0 <= d < D must be # negative to indicate that the document is invalid. -LABEL_PADDING_VALUE = -1 +#LABEL_PADDING_VALUE = -1 # Names of features in the ELWC. -QUERY_TOKENS = 'query_tokens' -DOCUMENT_TOKENS = 'document_tokens' -LABEL = 'relevance' +#QUERY_TOKENS = 'query_tokens' +#DOCUMENT_TOKENS = 'document_tokens' +#LABEL = 'relevance' # This "feature" does not exist in the data but will be created on the fly. -LIST_SIZE_FEATURE_NAME = 'example_list_size' +# LIST_SIZE_FEATURE_NAME = 'example_list_size' -def get_features(): - """Defines the context features and example features spec for parsing.""" +#def get_features(): +# """Defines the context features and example features spec for parsing.""" - context_features = [ - struct2tensor_parsing_utils.Feature(QUERY_TOKENS, tf.string) - ] + # context_features = [ + # struct2tensor_parsing_utils.Feature(QUERY_TOKENS, tf.string) + # ] - example_features = [ - struct2tensor_parsing_utils.Feature(DOCUMENT_TOKENS, tf.string) - ] +# example_features = [ +# struct2tensor_parsing_utils.Feature(DOCUMENT_TOKENS, tf.string) +# ] - label = struct2tensor_parsing_utils.Feature(LABEL, tf.int64) +# label = struct2tensor_parsing_utils.Feature(LABEL, tf.int64) - return context_features, example_features, label +# return context_features, example_features, label diff --git a/tfx/examples/ranking/ranking_pipeline_e2e_test.py b/tfx/examples/ranking/ranking_pipeline_e2e_test.py index 7d71530f4b..9e953cc688 100644 --- a/tfx/examples/ranking/ranking_pipeline_e2e_test.py +++ b/tfx/examples/ranking/ranking_pipeline_e2e_test.py @@ -16,9 +16,12 @@ import unittest import tensorflow as tf -from tfx.examples.ranking import ranking_pipeline -from tfx.orchestration import metadata -from tfx.orchestration.beam.beam_dag_runner import BeamDagRunner +# from tfx.orchestration import metadata +# from tfx.orchestration.beam.beam_dag_runner import BeamDagRunner + +# This is due to TF Ranking not supporting TensorFlow 2.16, We should re-enable it when support is added. +# from tfx.examples.ranking import ranking_pipeline + try: import struct2tensor # pylint: disable=g-import-not-at-top @@ -62,23 +65,23 @@ def assertExecutedOnce(self, component) -> None: execution = tf.io.gfile.listdir(os.path.join(component_path, output)) self.assertEqual(1, len(execution)) - def testPipeline(self): - BeamDagRunner().run( - ranking_pipeline._create_pipeline( - pipeline_name=self._pipeline_name, - pipeline_root=self._tfx_root, - data_root=self._data_root, - module_file=self._module_file, - serving_model_dir=self._serving_model_dir, - metadata_path=self._metadata_path, - beam_pipeline_args=['--direct_num_workers=1'])) - self.assertTrue(tf.io.gfile.exists(self._serving_model_dir)) - self.assertTrue(tf.io.gfile.exists(self._metadata_path)) + #def testPipeline(self): + # BeamDagRunner().run( + # ranking_pipeline._create_pipeline( + # pipeline_name=self._pipeline_name, + # pipeline_root=self._tfx_root, + # data_root=self._data_root, + # module_file=self._module_file, + # serving_model_dir=self._serving_model_dir, + # metadata_path=self._metadata_path, + # beam_pipeline_args=['--direct_num_workers=1'])) + # self.assertTrue(tf.io.gfile.exists(self._serving_model_dir)) + # self.assertTrue(tf.io.gfile.exists(self._metadata_path)) - metadata_config = metadata.sqlite_metadata_connection_config( - self._metadata_path) - with metadata.Metadata(metadata_config) as m: - artifact_count = len(m.store.get_artifacts()) - execution_count = len(m.store.get_executions()) - self.assertGreaterEqual(artifact_count, execution_count) - self.assertEqual(9, execution_count) + # metadata_config = metadata.sqlite_metadata_connection_config( + # self._metadata_path) + # with metadata.Metadata(metadata_config) as m: + # artifact_count = len(m.store.get_artifacts()) + # execution_count = len(m.store.get_executions()) + # self.assertGreaterEqual(artifact_count, execution_count) + # self.assertEqual(9, execution_count) diff --git a/tfx/examples/ranking/ranking_utils.py b/tfx/examples/ranking/ranking_utils.py index 7312bed837..9e953cc688 100644 --- a/tfx/examples/ranking/ranking_utils.py +++ b/tfx/examples/ranking/ranking_utils.py @@ -11,247 +11,77 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Module file.""" +"""Tests for tfx.examples.ranking.ranking_pipeline.""" +import os +import unittest import tensorflow as tf -import tensorflow_ranking as tfr -import tensorflow_transform as tft -from tfx.examples.ranking import features -from tfx.examples.ranking import struct2tensor_parsing_utils -from tfx_bsl.public import tfxio - - -def make_decoder(): - """Creates a data decoder that that decodes ELWC records to tensors. - - A DataView (see "TfGraphDataViewProvider" component in the pipeline) - will refer to this decoder. And any components that consumes the data - with the DataView applied will use this decoder. - - Returns: - A ELWC decoder. - """ - context_features, example_features, label_feature = features.get_features() - - return struct2tensor_parsing_utils.ELWCDecoder( - name='ELWCDecoder', - context_features=context_features, - example_features=example_features, - size_feature_name=features.LIST_SIZE_FEATURE_NAME, - label_feature=label_feature) - - -def preprocessing_fn(inputs): - """Transform preprocessing_fn.""" - - # generate a shared vocabulary. - _ = tft.vocabulary( - tf.concat([ - inputs[features.QUERY_TOKENS].flat_values, - inputs[features.DOCUMENT_TOKENS].flat_values - ], - axis=0), - vocab_filename='shared_vocab') - return inputs - - -def run_fn(trainer_fn_args): - """TFX trainer entry point.""" - - tf_transform_output = tft.TFTransformOutput(trainer_fn_args.transform_output) - hparams = dict( - batch_size=32, - embedding_dimension=20, - learning_rate=0.05, - dropout_rate=0.8, - hidden_layer_dims=[64, 32, 16], - loss='approx_ndcg_loss', - use_batch_norm=True, - batch_norm_moment=0.99 - ) - - train_dataset = _input_fn(trainer_fn_args.train_files, - trainer_fn_args.data_accessor, - hparams['batch_size']) - eval_dataset = _input_fn(trainer_fn_args.eval_files, - trainer_fn_args.data_accessor, - hparams['batch_size']) - - model = _create_ranking_model(tf_transform_output, hparams) - model.summary() - log_dir = trainer_fn_args.model_run_dir - # Write logs to path - tensorboard_callback = tf.keras.callbacks.TensorBoard( - log_dir=log_dir, update_freq='epoch') - model.fit( - train_dataset, - steps_per_epoch=trainer_fn_args.train_steps, - validation_data=eval_dataset, - validation_steps=trainer_fn_args.eval_steps, - callbacks=[tensorboard_callback]) - - # TODO(zhuo): Add support for Regress signature. - @tf.function(input_signature=[tf.TensorSpec([None], tf.string)], - autograph=False) - def predict_serving_fn(serialized_elwc_records): - decode_fn = trainer_fn_args.data_accessor.data_view_decode_fn - decoded = decode_fn(serialized_elwc_records) - decoded.pop(features.LABEL) - return {tf.saved_model.PREDICT_OUTPUTS: model(decoded)} - - model.save( - trainer_fn_args.serving_model_dir, - save_format='tf', - signatures={ - 'serving_default': - predict_serving_fn.get_concrete_function(), - }) - - -def _input_fn(file_patterns, - data_accessor, - batch_size) -> tf.data.Dataset: - """Returns a dataset of decoded tensors.""" - - def prepare_label(parsed_ragged_tensors): - label = parsed_ragged_tensors.pop(features.LABEL) - # Convert labels to a dense tensor. - label = label.to_tensor(default_value=features.LABEL_PADDING_VALUE) - return parsed_ragged_tensors, label - - # NOTE: this dataset already contains RaggedTensors from the Decoder. - dataset = data_accessor.tf_dataset_factory( - file_patterns, - tfxio.TensorFlowDatasetOptions(batch_size=batch_size), - schema=None) - return dataset.map(prepare_label).repeat() - - -def _preprocess_keras_inputs(context_keras_inputs, example_keras_inputs, - tf_transform_output, hparams): - """Preprocesses the inputs, including vocab lookup and embedding.""" - lookup_layer = tf.keras.layers.experimental.preprocessing.StringLookup( - max_tokens=( - tf_transform_output.vocabulary_size_by_name('shared_vocab') + 1), - vocabulary=tf_transform_output.vocabulary_file_by_name('shared_vocab'), - num_oov_indices=1, - oov_token='[UNK#]', - mask_token=None) - embedding_layer = tf.keras.layers.Embedding( - input_dim=( - tf_transform_output.vocabulary_size_by_name('shared_vocab') + 1), - output_dim=hparams['embedding_dimension'], - embeddings_initializer=None, - embeddings_constraint=None) - def embedding(input_tensor): - # TODO(b/158673891): Support weighted features. - embedded_tensor = embedding_layer(lookup_layer(input_tensor)) - mean_embedding = tf.reduce_mean(embedded_tensor, axis=-2) - # mean_embedding could be a dense tensor (context feature) or a ragged - # tensor (example feature). if it's ragged, we densify it first. - if isinstance(mean_embedding.type_spec, tf.RaggedTensorSpec): - return struct2tensor_parsing_utils.make_ragged_densify_layer()( - mean_embedding) - return mean_embedding - preprocessed_context_features, preprocessed_example_features = {}, {} - context_features, example_features, _ = features.get_features() - for feature in context_features: - preprocessed_context_features[feature.name] = embedding( - context_keras_inputs[feature.name]) - for feature in example_features: - preprocessed_example_features[feature.name] = embedding( - example_keras_inputs[feature.name]) - list_size = struct2tensor_parsing_utils.make_ragged_densify_layer()( - context_keras_inputs[features.LIST_SIZE_FEATURE_NAME]) - list_size = tf.reshape(list_size, [-1]) - mask = tf.sequence_mask(list_size) - - return preprocessed_context_features, preprocessed_example_features, mask - - -def _create_ranking_model(tf_transform_output, hparams) -> tf.keras.Model: - """Creates a Keras ranking model.""" - context_feature_specs, example_feature_specs, _ = features.get_features() - context_keras_inputs, example_keras_inputs = ( - struct2tensor_parsing_utils.create_keras_inputs( - context_feature_specs, example_feature_specs, - features.LIST_SIZE_FEATURE_NAME)) - context_features, example_features, mask = _preprocess_keras_inputs( - context_keras_inputs, example_keras_inputs, tf_transform_output, hparams) - - # Since argspec inspection is expensive, for keras layer, - # layer_obj._call_spec.arg_names is a property that uses cached argspec for - # call. We use this to determine whether the layer expects `inputs` as first - # argument. - # TODO(b/185176464): update tfr dependency to remove this branch. - flatten_list = tfr.keras.layers.FlattenList() - - # TODO(kathywu): remove the except branch once changes to the call function - # args in the Keras Layer have been released. - try: - first_arg_name = flatten_list._call_spec.arg_names[0] # pylint: disable=protected-access - except AttributeError: - first_arg_name = flatten_list._call_fn_args[0] # pylint: disable=protected-access - if first_arg_name == 'inputs': - (flattened_context_features, flattened_example_features) = flatten_list( - inputs=(context_features, example_features, mask)) - else: - (flattened_context_features, - flattened_example_features) = flatten_list(context_features, - example_features, mask) - - # Concatenate flattened context and example features along `list_size` dim. - context_input = [ - tf.keras.layers.Flatten()(flattened_context_features[name]) - for name in sorted(flattened_context_features) - ] - example_input = [ - tf.keras.layers.Flatten()(flattened_example_features[name]) - for name in sorted(flattened_example_features) - ] - input_layer = tf.concat(context_input + example_input, 1) - dnn = tf.keras.Sequential() - if hparams['use_batch_norm']: - dnn.add( - tf.keras.layers.BatchNormalization( - momentum=hparams['batch_norm_moment'])) - for layer_size in hparams['hidden_layer_dims']: - dnn.add(tf.keras.layers.Dense(units=layer_size)) - if hparams['use_batch_norm']: - dnn.add(tf.keras.layers.BatchNormalization( - momentum=hparams['batch_norm_moment'])) - dnn.add(tf.keras.layers.Activation(activation=tf.nn.relu)) - dnn.add(tf.keras.layers.Dropout(rate=hparams['dropout_rate'])) - - dnn.add(tf.keras.layers.Dense(units=1)) - - # Since argspec inspection is expensive, for keras layer, - # layer_obj._call_spec.arg_names is a property that uses cached argspec for - # call. We use this to determine whether the layer expects `inputs` as first - # argument. - restore_list = tfr.keras.layers.RestoreList() - - # TODO(kathywu): remove the except branch once changes to the call function - # args in the Keras Layer have been released. - try: - first_arg_name = flatten_list._call_spec.arg_names[0] # pylint: disable=protected-access - except AttributeError: - first_arg_name = flatten_list._call_fn_args[0] # pylint: disable=protected-access - if first_arg_name == 'inputs': - logits = restore_list(inputs=(dnn(input_layer), mask)) - else: - logits = restore_list(dnn(input_layer), mask) - - model = tf.keras.Model( - inputs={ - **context_keras_inputs, - **example_keras_inputs - }, - outputs=logits, - name='dnn_ranking_model') - model.compile( - optimizer=tf.keras.optimizers.Adagrad( - learning_rate=hparams['learning_rate']), - loss=tfr.keras.losses.get(hparams['loss']), - metrics=tfr.keras.metrics.default_keras_metrics()) - return model +# from tfx.orchestration import metadata +# from tfx.orchestration.beam.beam_dag_runner import BeamDagRunner + +# This is due to TF Ranking not supporting TensorFlow 2.16, We should re-enable it when support is added. +# from tfx.examples.ranking import ranking_pipeline + + +try: + import struct2tensor # pylint: disable=g-import-not-at-top +except ImportError: + struct2tensor = None + +import pytest + + +@pytest.mark.xfail(run=False, reason="PR 6889 This class contains tests that fail and needs to be fixed. " +"If all tests pass, please remove this mark.") +@pytest.mark.e2e +@unittest.skipIf(struct2tensor is None, + 'Cannot import required modules. This can happen when' + ' struct2tensor is not available.') +class RankingPipelineTest(tf.test.TestCase): + + def setUp(self): + super().setUp() + self._test_dir = os.path.join( + os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), + self._testMethodName) + + self._pipeline_name = 'tf_ranking_test' + self._data_root = os.path.join(os.path.dirname(__file__), + 'testdata', 'input') + self._tfx_root = os.path.join(self._test_dir, 'tfx') + self._module_file = os.path.join(os.path.dirname(__file__), + 'ranking_utils.py') + self._serving_model_dir = os.path.join(self._test_dir, 'serving_model') + self._metadata_path = os.path.join(self._tfx_root, 'metadata', + self._pipeline_name, 'metadata.db') + print('TFX ROOT: ', self._tfx_root) + + def assertExecutedOnce(self, component) -> None: + """Check the component is executed exactly once.""" + component_path = os.path.join(self._pipeline_root, component) + self.assertTrue(tf.io.gfile.exists(component_path)) + outputs = tf.io.gfile.listdir(component_path) + for output in outputs: + execution = tf.io.gfile.listdir(os.path.join(component_path, output)) + self.assertEqual(1, len(execution)) + + #def testPipeline(self): + # BeamDagRunner().run( + # ranking_pipeline._create_pipeline( + # pipeline_name=self._pipeline_name, + # pipeline_root=self._tfx_root, + # data_root=self._data_root, + # module_file=self._module_file, + # serving_model_dir=self._serving_model_dir, + # metadata_path=self._metadata_path, + # beam_pipeline_args=['--direct_num_workers=1'])) + # self.assertTrue(tf.io.gfile.exists(self._serving_model_dir)) + # self.assertTrue(tf.io.gfile.exists(self._metadata_path)) + + # metadata_config = metadata.sqlite_metadata_connection_config( + # self._metadata_path) + # with metadata.Metadata(metadata_config) as m: + # artifact_count = len(m.store.get_artifacts()) + # execution_count = len(m.store.get_executions()) + # self.assertGreaterEqual(artifact_count, execution_count) + # self.assertEqual(9, execution_count) diff --git a/tfx/examples/ranking/struct2tensor_parsing_utils_test.py b/tfx/examples/ranking/struct2tensor_parsing_utils_test.py index f523ef1de7..2d2406012a 100644 --- a/tfx/examples/ranking/struct2tensor_parsing_utils_test.py +++ b/tfx/examples/ranking/struct2tensor_parsing_utils_test.py @@ -1,3 +1,4 @@ + # Copyright 2021 Google LLC. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,18 +16,18 @@ -import itertools -import unittest +# import unittest import tensorflow as tf from google.protobuf import text_format from tensorflow_serving.apis import input_pb2 -try: - from tfx.examples.ranking import struct2tensor_parsing_utils # pylint: disable=g-import-not-at-top -except ImportError: - struct2tensor_parsing_utils = None +#try: + # This is due to TF Ranking not supporting TensorFlow 2.16, We should re-enable it when support is added. + # from tfx.examples.ranking import struct2tensor_parsing_utils # pylint: disable=g-import-not-at-top +#except ImportError: + # struct2tensor_parsing_utils = None _ELWCS = [ @@ -171,82 +172,77 @@ ] -@unittest.skipIf(struct2tensor_parsing_utils is None, - 'Cannot import required modules. This can happen when' - ' struct2tensor is not available.') +# @unittest.skipIf(struct2tensor_parsing_utils is None, +# 'Cannot import required modules. This can happen when' +# ' struct2tensor is not available.') class ELWCDecoderTest(tf.test.TestCase): - - def testAllDTypes(self): - context_features = [ - struct2tensor_parsing_utils.Feature('ctx.int', tf.int64), - struct2tensor_parsing_utils.Feature('ctx.float', tf.float32), - struct2tensor_parsing_utils.Feature('ctx.bytes', tf.string), - ] - example_features = [ - struct2tensor_parsing_utils.Feature('example_int', tf.int64), - struct2tensor_parsing_utils.Feature('example_float', tf.float32), - struct2tensor_parsing_utils.Feature('example_bytes', tf.string), - ] - decoder = struct2tensor_parsing_utils.ELWCDecoder( - 'test_decoder', context_features, example_features, - size_feature_name=None, label_feature=None) - - result = decoder.decode_record(tf.convert_to_tensor(_ELWCS)) - self.assertLen(result, len(context_features) + len(example_features)) - for f in itertools.chain(context_features, example_features): - self.assertIn(f.name, result) - self.assertIsInstance(result[f.name], tf.RaggedTensor) - - expected = { - 'ctx.int': [[1, 2], [3]], - 'ctx.float': [[1.0, 2.0], [3.0]], - 'ctx.bytes': [[], [b'c']], - 'example_int': [[[11], [22]], [[33]]], - 'example_float': [[[11.0, 12.0], []], [[14.0, 15.0]]], - 'example_bytes': [[[b'u', b'v'], [b'w']], [[b'x', b'y', b'z']]], - } - self.assertEqual({k: v.to_list() for k, v in result.items()}, expected) - - def testDefaultFilling(self): - context_features = [ - struct2tensor_parsing_utils.Feature('ctx.bytes', tf.string, - default_value=b'g', length=1), - ] - example_features = [ - struct2tensor_parsing_utils.Feature('example_float', tf.float32, - default_value=-1.0, length=2), - ] - decoder = struct2tensor_parsing_utils.ELWCDecoder( - 'test_decoder', context_features, example_features, - size_feature_name=None, label_feature=None) - - result = decoder.decode_record(tf.convert_to_tensor(_ELWCS)) - self.assertLen(result, len(context_features) + len(example_features)) - for f in itertools.chain(context_features, example_features): - self.assertIn(f.name, result) - self.assertIsInstance(result[f.name], tf.RaggedTensor) - - expected = { - 'ctx.bytes': [[b'g'], [b'c']], - 'example_float': [[[11.0, 12.0], [-1.0, -1.0]], [[14.0, 15.0]]], - } - self.assertEqual({k: v.to_list() for k, v in result.items()}, expected) - - def testLabelFeature(self): - decoder = struct2tensor_parsing_utils.ELWCDecoder( - 'test_decoder', [], [], - size_feature_name=None, - label_feature=struct2tensor_parsing_utils.Feature( - 'example_int', tf.int64)) - result = decoder.decode_record(tf.convert_to_tensor(_ELWCS)) - - self.assertLen(result, 1) - self.assertEqual(result['example_int'].to_list(), [[11.0, 22.0], [33.0]]) - - def testSizeFeature(self): - decoder = struct2tensor_parsing_utils.ELWCDecoder( - 'test_decoder', [], [], - size_feature_name='example_list_size') - result = decoder.decode_record(tf.convert_to_tensor(_ELWCS)) - self.assertLen(result, 1) - self.assertEqual(result['example_list_size'].to_list(), [[2], [1]]) + pass # Added to prevent syntax error due to an empty class definition + #def testAllDTypes(self): + # context_features = [ + # struct2tensor_parsing_utils.Feature('ctx.int', tf.int64), + # struct2tensor_parsing_utils.Feature('ctx.float', tf.float32), + # struct2tensor_parsing_utils.Feature('ctx.bytes', tf.string), + # ] + # example_features = [ + # struct2tensor_parsing_utils.Feature('example_int', tf.int64), + # struct2tensor_parsing_utils.Feature('example_float', tf.float32), + # struct2tensor_parsing_utils.Feature('example_bytes', tf.string), + # ] + # decoder = struct2tensor_parsing_utils.ELWCDecoder( + # 'test_decoder', context_features, example_features, + # size_feature_name=None, label_feature=None) + + # result = decoder.decode_record(tf.convert_to_tensor(_ELWCS)) + # self.assertLen(result, len(context_features) + len(example_features)) + # for f in itertools.chain(context_features, example_features): + # self.assertIn(f.name, result) + # self.assertIsInstance(result[f.name], tf.RaggedTensor) + + # expected = { + # 'ctx.int': [[1, 2], [3]], + # 'ctx.float': [[1.0, 2.0], [3.0]], + # 'ctx.bytes': [[], [b'c']], + # 'example_int': [[[11], [22]], [[33]]], + # 'example_float': [[[11.0, 12.0], []], [[14.0, 15.0]]], + # 'example_bytes': [[[b'u', b'v'], [b'w']], [[b'x', b'y', b'z']]], + # } + # self.assertEqual({k: v.to_list() for k, v in result.items()}, expected) + # def testDefaultFilling(self): + # context_features = [ + # struct2tensor_parsing_utils.Feature('ctx.bytes', tf.string, + # default_value=b'g', length=1), + # ] + # example_features = [ + # struct2tensor_parsing_utils.Feature('example_float', tf.float32, + # default_value=-1.0, length=2), + # ] + # decoder = struct2tensor_parsing_utils.ELWCDecoder( + # 'test_decoder', context_features, example_features, + # size_feature_name=None, label_feature=None) + # result = decoder.decode_record(tf.convert_to_tensor(_ELWCS)) + # self.assertLen(result, len(context_features) + len(example_features)) + # for f in itertools.chain(context_features, example_features): + # self.assertIn(f.name, result) + # self.assertIsInstance(result[f.name], tf.RaggedTensor) + # expected = { + # 'ctx.bytes': [[b'g'], [b'c']], + # 'example_float': [[[11.0, 12.0], [-1.0, -1.0]], [[14.0, 15.0]]], + # } + # self.assertEqual({k: v.to_list() for k, v in result.items()}, expected) + # def testLabelFeature(self): + # decoder = struct2tensor_parsing_utils.ELWCDecoder( + # 'test_decoder', [], [], + # size_feature_name=None, + # label_feature=struct2tensor_parsing_utils.Feature( + # 'example_int', tf.int64)) + # result = decoder.decode_record(tf.convert_to_tensor(_ELWCS)) + + # self.assertLen(result, 1) + # self.assertEqual(result['example_int'].to_list(), [[11.0, 22.0], [33.0]]) + # def testSizeFeature(self): + # decoder = struct2tensor_parsing_utils.ELWCDecoder( + # 'test_decoder', [], [], + # size_feature_name='example_list_size') + # result = decoder.decode_record(tf.convert_to_tensor(_ELWCS)) + # self.assertLen(result, 1) + # self.assertEqual(result['example_list_size'].to_list(), [[2], [1]]) From da29117df340736894594bc05eee8e4d331cf6dc Mon Sep 17 00:00:00 2001 From: Doojin Park Date: Thu, 12 Dec 2024 11:46:04 +0900 Subject: [PATCH 4/4] Set bazel version for docker builder (#7732) --- tfx/tools/docker/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/tfx/tools/docker/Dockerfile b/tfx/tools/docker/Dockerfile index 9fa9938175..3ae4f936a8 100644 --- a/tfx/tools/docker/Dockerfile +++ b/tfx/tools/docker/Dockerfile @@ -26,6 +26,7 @@ WORKDIR ${TFX_DIR} # Specify what version of dependent libraries will be used. See dependencies.py. ARG TFX_DEPENDENCY_SELECTOR ENV TFX_DEPENDENCY_SELECTOR=${TFX_DEPENDENCY_SELECTOR} +ENV USE_BAZEL_VERSION=6.5.0 RUN python -m pip install --upgrade pip wheel setuptools tomli