diff --git a/RELEASE.md b/RELEASE.md index ccdf1d931d..4045140f64 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,3 +1,19 @@ +# Release 2.15.1 + +## Breaking Changes + +- TensorBoard.dev is shutting down. See the FAQ at https://tensorboard.dev. + - No longer able to upload new data to TensorBoard.dev. The `tensorboard dev upload` command will fail. (#6638) + - The experimental dataframe api has been deleted. (#6644) + +## Bug Fixes + +- Time Series dashboard: + - Sort run names with leading numbers differently. (#6664) + - Show scrollbar in runs table only when needed. (#6656) + - Fix 'Prev' and 'Next' buttons in dark mode. (#6663) + - Better loading/reloading behavior for runs table. (#6658) + # Release 2.15.0 The 2.15 minor series tracks TensorFlow 2.15. diff --git a/tensorboard/BUILD b/tensorboard/BUILD index d03a9e1452..7c8c1252df 100644 --- a/tensorboard/BUILD +++ b/tensorboard/BUILD @@ -89,7 +89,6 @@ py_library( ":lib_init_only", ":notebook", ":program", - "//tensorboard/data:lib_init_only", "//tensorboard/summary", "//tensorboard/summary:summary_v1", "//tensorboard/summary:summary_v2", diff --git a/tensorboard/__init__.py b/tensorboard/__init__.py index 0275d36555..95bc174e64 100644 --- a/tensorboard/__init__.py +++ b/tensorboard/__init__.py @@ -71,13 +71,6 @@ # additional discussion. -@_lazy.lazy_load("tensorboard.data") -def data(): - import importlib - - return importlib.import_module("tensorboard.data") - - @_lazy.lazy_load("tensorboard.errors") def errors(): import importlib diff --git a/tensorboard/data/BUILD b/tensorboard/data/BUILD index 22154fd938..0a3337af1a 100644 --- a/tensorboard/data/BUILD +++ b/tensorboard/data/BUILD @@ -8,16 +8,6 @@ package(default_visibility = ["//tensorboard:internal"]) licenses(["notice"]) -py_library( - name = "lib_init_only", - srcs = ["__init__.py"], - srcs_version = "PY3", - visibility = ["//tensorboard:internal"], - deps = [ - "//tensorboard/data/experimental:lib_init_only", - ], -) - py_library( name = "provider", srcs = ["provider.py"], diff --git a/tensorboard/data/__init__.py b/tensorboard/data/__init__.py index dfcbc38f92..931c2ef11d 100644 --- a/tensorboard/data/__init__.py +++ b/tensorboard/data/__init__.py @@ -12,6 +12,3 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== - - -from tensorboard.data import experimental # noqa: F401 diff --git a/tensorboard/data/experimental/BUILD b/tensorboard/data/experimental/BUILD index 8d49db14e5..da1d82e6a2 100644 --- a/tensorboard/data/experimental/BUILD +++ b/tensorboard/data/experimental/BUILD @@ -1,78 +1,7 @@ -# Description: -# Experiment Data Access API. -load("@rules_python//python:py_binary.bzl", "py_binary") -load("@rules_python//python:py_library.bzl", "py_library") -load("@rules_python//python:py_test.bzl", "py_test") - +# This is a stub BUILD file that remains after the deletion of the experimental +# data frame API. We keep it (temporarily) to allow copybara imports to succeed. package(default_visibility = ["//tensorboard:internal"]) licenses(["notice"]) exports_files(["LICENSE"]) - -py_library( - name = "base_experiment", - srcs = ["base_experiment.py"], - srcs_version = "PY3", -) - -py_library( - name = "experiment_from_dev", - srcs = ["experiment_from_dev.py"], - srcs_version = "PY3", - deps = [ - ":base_experiment", - ":utils", - "//tensorboard:expect_grpc_installed", - "//tensorboard:expect_pandas_installed", - "//tensorboard/uploader:auth", - "//tensorboard/uploader:server_info", - "//tensorboard/uploader:util", - "//tensorboard/uploader/proto:protos_all_py_pb2", - "//tensorboard/uploader/proto:protos_all_py_pb2_grpc", - "//tensorboard/util:grpc_util", - ], -) - -py_test( - name = "experiment_from_dev_test", - srcs = ["experiment_from_dev_test.py"], - srcs_version = "PY3", - deps = [ - ":experiment_from_dev", - "//tensorboard:expect_numpy_installed", - "//tensorboard:expect_pandas_installed", - "//tensorboard:test", - "//tensorboard/compat/proto:protos_all_py_pb2", - "//tensorboard/uploader:test_util", - "//tensorboard/uploader/proto:protos_all_py_pb2", - "//tensorboard/util:grpc_util", - ], -) - -py_library( - name = "lib_init_only", - srcs = ["__init__.py"], - srcs_version = "PY3", - visibility = ["//tensorboard:internal"], - deps = [ - ":experiment_from_dev", - ], -) - -py_binary( - name = "test_binary", - srcs = ["test_binary.py"], - srcs_version = "PY3", - deps = ["//tensorboard/data/experimental:experiment_from_dev"], -) - -py_library( - name = "utils", - srcs = ["utils.py"], - srcs_version = "PY3", - visibility = ["//tensorboard:internal"], - deps = [ - "//tensorboard:expect_numpy_installed", - ], -) diff --git a/tensorboard/data/experimental/__init__.py b/tensorboard/data/experimental/__init__.py deleted file mode 100644 index 3257ac3e28..0000000000 --- a/tensorboard/data/experimental/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - - -from tensorboard.data.experimental.experiment_from_dev import ( # noqa: F401 - ExperimentFromDev, -) diff --git a/tensorboard/data/experimental/base_experiment.py b/tensorboard/data/experimental/base_experiment.py deleted file mode 100644 index eb6399673f..0000000000 --- a/tensorboard/data/experimental/base_experiment.py +++ /dev/null @@ -1,76 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Base Class of Experiment Data Access API.""" - - -import abc - - -class BaseExperiment(metaclass=abc.ABCMeta): - """Base class for experiment data access.""" - - # TODO(cais): Add list_scalar_runs(). - # TODO(cais): Add list_scalar_tags(). - - @abc.abstractmethod - def get_scalars( - self, - runs_filter=None, - tags_filter=None, - pivot=False, - include_wall_time=False, - ): - """Export scalar data as a pandas.DataFrame. - - Args: - runs_filter: A regex filter for runs (e.g., r'run_[2-4]'). Operates in - logical AND relation with `tags_filter`. - tags_filter: A regex filter for tags (e.g., r'.*loss.*'). Operates in - logical AND related with `runs_filter`. - pivot: Whether to returned DataFrame will be pivoted (via pandas’ - `pivot_data()` method to a “wide” format wherein the tags of a - given run and a given step are all collected in a single row. - Setting `pivot` to `True` stipulates that the sets of step values - are identical among all tags in every run of the experiment (after - any run and tag filtering), so that the pivoting operation will not - introduce missing values in the resultant DataFrame. Failing to meet - this condition will cause `pivot=True` to raise a `ValueError`. - If not provided, defaults to `False`. - include_wall_time: Include wall_time (timestamps in nanoseconds since - the epoch in float64) as a column in the returned DataFrame. - If not provided, defaults to `False`. - - Returns: - If `pivot` (default): - A pivoted DataFrame with the indexing columns of - - run - - step - And value columns that correspond to the tags. - Duplicate entries for each run-step combination will be aggregated - with `numpy.stack`. This format is more friendly to manipulation and - plotting and hence io chosen as the default. When certain rows have - missing values, a warning message will be displayed and advise the - user to use the `pivot=False` if steps have different meanings in - the experiment. - If `not pivot`: - A DataFrame with the following columns. - - run: (non-null object) - - tag: (non-null object) - - steps: (non-null int64) - - wall_time: (non-null object) - - value: (non-null float32) - """ - # TODO(cais): Add description about sorting order. - pass diff --git a/tensorboard/data/experimental/experiment_from_dev.py b/tensorboard/data/experimental/experiment_from_dev.py deleted file mode 100644 index 70bd98efdf..0000000000 --- a/tensorboard/data/experimental/experiment_from_dev.py +++ /dev/null @@ -1,159 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Experiment Data Access API for tensorboard.dev.""" - - -import sys -import time - -import grpc - -from tensorboard.data.experimental import base_experiment -from tensorboard.data.experimental import utils as experimental_utils -from tensorboard.uploader import auth -from tensorboard.uploader import util -from tensorboard.uploader import server_info as server_info_lib -from tensorboard.uploader.proto import export_service_pb2 -from tensorboard.uploader.proto import export_service_pb2_grpc -from tensorboard.uploader.proto import server_info_pb2 -from tensorboard.util import grpc_util - - -DEFAULT_ORIGIN = "https://tensorboard.dev" - - -def import_pandas(): - """Import pandas, guarded by a user-friendly error message on failure.""" - try: - import pandas - except ImportError: - raise ImportError( - "The get_scalars() feature requires the pandas package, " - "which does not seem to be available in your Python " - "environment. You can install it with command:\n\n" - " pip install pandas\n" - ) - return pandas - - -class ExperimentFromDev(base_experiment.BaseExperiment): - """Implementation of BaseExperiment, specialized for tensorboard.dev.""" - - def __init__(self, experiment_id, api_endpoint=None): - """Constructor of ExperimentFromDev. - - Args: - experiment_id: String ID of the experiment on tensorboard.dev (e.g., - "AdYd1TgeTlaLWXx6I8JUbA"). - api_endpoint: Optional override value for API endpoint. Used for - development only. - """ - super().__init__() - self._experiment_id = experiment_id - self._api_client = get_api_client(api_endpoint=api_endpoint) - - def get_scalars( - self, - runs_filter=None, - tags_filter=None, - pivot=False, - include_wall_time=False, - ): - # NOTE(#3650): Import pandas early in this method, so if the - # Python environment does not have pandas installed, an error can be - # raised early, before any rpc call is made. - pandas = import_pandas() - if runs_filter is not None: - raise NotImplementedError( - "runs_filter support for get_scalars() is not implemented yet." - ) - if tags_filter is not None: - raise NotImplementedError( - "tags_filter support for get_scalars() is not implemented yet." - ) - - request = export_service_pb2.StreamExperimentDataRequest() - request.experiment_id = self._experiment_id - read_time = time.time() - util.set_timestamp(request.read_timestamp, read_time) - # TODO(cais, wchargin): Use another rpc to check for staleness and avoid - # a new StreamExperimentData rpc request if data is not stale. - stream = self._api_client.StreamExperimentData( - request, metadata=grpc_util.version_metadata() - ) - - runs = [] - tags = [] - steps = [] - wall_times = [] - values = [] - for response in stream: - # TODO(cais, wchargin): Display progress bar during data loading. - num_values = len(response.points.values) - runs.extend([response.run_name] * num_values) - tags.extend([response.tag_name] * num_values) - steps.extend(list(response.points.steps)) - wall_times.extend( - [t.ToNanoseconds() / 1e9 for t in response.points.wall_times] - ) - values.extend(list(response.points.values)) - - data = { - "run": runs, - "tag": tags, - "step": steps, - "value": values, - } - if include_wall_time: - data["wall_time"] = wall_times - dataframe = pandas.DataFrame(data) - if pivot: - dataframe = experimental_utils.pivot_dataframe(dataframe) - return dataframe - - -def get_api_client(api_endpoint=None): - server_info = _get_server_info(api_endpoint=api_endpoint) - _handle_server_info(server_info) - channel_creds = grpc.ssl_channel_credentials() - credentials = auth.CredentialsStore().read_credentials() - if credentials: - channel_creds = grpc.composite_channel_credentials( - channel_creds, auth.id_token_call_credentials(credentials) - ) - channel = grpc.secure_channel( - server_info.api_server.endpoint, channel_creds - ) - return export_service_pb2_grpc.TensorBoardExporterServiceStub(channel) - - -def _get_server_info(api_endpoint=None): - # TODO(cais): Add more plugins to the list when more plugin/data types - # are supported - plugins = ["scalars"] - if api_endpoint: - return server_info_lib.create_server_info( - DEFAULT_ORIGIN, api_endpoint, plugins - ) - return server_info_lib.fetch_server_info(DEFAULT_ORIGIN, plugins) - - -def _handle_server_info(info): - compat = info.compatibility - if compat.verdict == server_info_pb2.VERDICT_WARN: - sys.stderr.write("Warning [from server]: %s\n" % compat.details) - sys.stderr.flush() - elif compat.verdict == server_info_pb2.VERDICT_ERROR: - raise ValueError("Error [from server]: %s" % compat.details) diff --git a/tensorboard/data/experimental/experiment_from_dev_test.py b/tensorboard/data/experimental/experiment_from_dev_test.py deleted file mode 100644 index 13f5ce2de9..0000000000 --- a/tensorboard/data/experimental/experiment_from_dev_test.py +++ /dev/null @@ -1,250 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for tensorboard.uploader.exporter.""" - - -from unittest import mock - -import numpy as np -import pandas - -from tensorboard import test as tb_test -from tensorboard.data.experimental import experiment_from_dev -from tensorboard.uploader import test_util -from tensorboard.uploader.proto import export_service_pb2 -from tensorboard.util import grpc_util - - -class ExperimentFromDevTest(tb_test.TestCase): - def test_get_scalars_works(self): - mock_api_client = mock.Mock() - - def stream_experiment_data(request, **kwargs): - self.assertEqual(request.experiment_id, "789") - self.assertEqual(kwargs["metadata"], grpc_util.version_metadata()) - for run in ("train", "test"): - for tag in ("accuracy", "loss"): - response = export_service_pb2.StreamExperimentDataResponse() - response.run_name = run - response.tag_name = tag - display_name = "%s:%s" % (request.experiment_id, tag) - response.tag_metadata.CopyFrom( - test_util.scalar_metadata(display_name) - ) - for step in range(10): - response.points.steps.append(step) - if tag == "loss": - if run == "train": - value = 1.0 / (step + 1) - seconds = step - else: - value = -1.0 / (step + 1) - seconds = 600 + step - else: # "accuracy" - if run == "train": - value = 1.0 / (10 - step) - seconds = step * 2 - else: - value = -1.0 / (10 - step) - seconds = 600 + step * 2 - response.points.values.append(value) - response.points.wall_times.add(seconds=seconds, nanos=0) - yield response - - mock_api_client.StreamExperimentData = mock.Mock( - wraps=stream_experiment_data - ) - - with mock.patch.object( - experiment_from_dev, - "get_api_client", - lambda api_endpoint: mock_api_client, - ): - experiment = experiment_from_dev.ExperimentFromDev("789") - for pivot in (False, True): - for include_wall_time in (False, True): - with self.subTest( - "pivot=%s; include_wall_time=%s" - % (pivot, include_wall_time) - ): - dataframe = experiment.get_scalars( - pivot=pivot, include_wall_time=include_wall_time - ) - - if pivot: - run_key = ( - ("run", "") if include_wall_time else "run" - ) - step_key = ( - ("step", "") if include_wall_time else "step" - ) - accuracy_value_key = ( - ("value", "accuracy") - if include_wall_time - else "accuracy" - ) - loss_value_key = ( - ("value", "loss") - if include_wall_time - else "loss" - ) - data = { - run_key: ["test"] * 10 + ["train"] * 10, - step_key: np.concatenate( - [np.arange(0, 10), np.arange(0, 10)] - ), - accuracy_value_key: np.concatenate( - [ - -1.0 / (10.0 - np.arange(0, 10)), - 1.0 / (10.0 - np.arange(0, 10)), - ], - ), - loss_value_key: np.concatenate( - [ - -1.0 / (1.0 + np.arange(0, 10)), - 1.0 / (1.0 + np.arange(0, 10)), - ], - ), - } - if include_wall_time: - data[ - ("wall_time", "accuracy") - ] = np.concatenate( - [ - 600.0 + 2.0 * np.arange(0, 10), - 2.0 * np.arange(0, 10), - ] - ) - data[("wall_time", "loss")] = np.concatenate( - [ - 600.0 + np.arange(0, 10), - 1.0 * np.arange(0, 10), - ] - ) - expected = pandas.DataFrame(data) - else: # No pivot_table. - data = { - "run": ["train"] * 20 + ["test"] * 20, - "tag": (["accuracy"] * 10 + ["loss"] * 10) * 2, - "step": list(np.arange(0, 10)) * 4, - "value": np.concatenate( - [ - 1.0 / (10.0 - np.arange(0, 10)), - 1.0 / (1.0 + np.arange(0, 10)), - -1.0 / (10.0 - np.arange(0, 10)), - -1.0 / (1.0 + np.arange(0, 10)), - ] - ), - } - if include_wall_time: - data["wall_time"] = np.concatenate( - [ - 2.0 * np.arange(0, 10), - 1.0 * np.arange(0, 10), - 600.0 + 2.0 * np.arange(0, 10), - 600.0 + np.arange(0, 10), - ] - ) - expected = pandas.DataFrame(data) - - pandas.testing.assert_frame_equal( - dataframe, - expected, - check_names=True, - ) - - def test_get_scalars_with_pivot_table_with_missing_value(self): - mock_api_client = mock.Mock() - - def stream_experiment_data(request, **kwargs): - self.assertEqual(request.experiment_id, "789") - self.assertEqual(kwargs["metadata"], grpc_util.version_metadata()) - response = export_service_pb2.StreamExperimentDataResponse() - response.run_name = "train" - response.tag_name = "batch_loss" - response.points.steps.append(0) - response.points.values.append(0.5) - response.points.wall_times.add(seconds=0, nanos=0) - response.points.steps.append(1) - response.points.values.append(0.25) - response.points.wall_times.add(seconds=1, nanos=0) - yield response - response = export_service_pb2.StreamExperimentDataResponse() - response.run_name = "train" - response.tag_name = "epoch_loss" - response.points.steps.append(0) - response.points.values.append(0.375) - response.points.wall_times.add(seconds=2, nanos=0) - yield response - - mock_api_client.StreamExperimentData = mock.Mock( - wraps=stream_experiment_data - ) - - with mock.patch.object( - experiment_from_dev, - "get_api_client", - lambda api_endpoint: mock_api_client, - ): - experiment = experiment_from_dev.ExperimentFromDev("789") - with self.assertRaisesRegex( - ValueError, - r"contains missing value\(s\).*different sets of " - r"steps.*pivot=False", - ): - experiment.get_scalars(pivot=True) - - def test_get_scalars_with_actual_inf_and_nan(self): - """Test for get_scalars() call that involve inf and nan in user data.""" - mock_api_client = mock.Mock() - - def stream_experiment_data(request, **kwargs): - self.assertEqual(request.experiment_id, "789") - self.assertEqual(kwargs["metadata"], grpc_util.version_metadata()) - response = export_service_pb2.StreamExperimentDataResponse() - response.run_name = "train" - response.tag_name = "batch_loss" - response.points.steps.append(0) - response.points.values.append(np.nan) - response.points.wall_times.add(seconds=0, nanos=0) - response.points.steps.append(1) - response.points.values.append(np.inf) - response.points.wall_times.add(seconds=10, nanos=0) - yield response - - mock_api_client.StreamExperimentData = mock.Mock( - wraps=stream_experiment_data - ) - - with mock.patch.object( - experiment_from_dev, - "get_api_client", - lambda api_endpoint: mock_api_client, - ): - experiment = experiment_from_dev.ExperimentFromDev("789") - dataframe = experiment.get_scalars(pivot=True) - - expected = pandas.DataFrame( - { - "run": ["train"] * 2, - "step": [0, 1], - "batch_loss": [np.nan, np.inf], - } - ) - pandas.testing.assert_frame_equal(dataframe, expected, check_names=True) - - -if __name__ == "__main__": - tb_test.main() diff --git a/tensorboard/data/experimental/test_binary.py b/tensorboard/data/experimental/test_binary.py deleted file mode 100644 index 2ec9e57cc2..0000000000 --- a/tensorboard/data/experimental/test_binary.py +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""A test binary that can be used to test ExperimentFromDev features.""" - - -import argparse - -from tensorboard.data.experimental import experiment_from_dev - - -def parse_args(): - parser = argparse.ArgumentParser("Test run of ExperimentFromDev") - parser.add_argument( - "--experiment_id", - type=str, - default="AdYd1TgeTlaLWXx6I8JUbA", - help="Experiment ID", - ) - parser.add_argument( - "--api_endpoint", - type=str, - default=None, - help="Optional API endpoint used to override the default", - ) - parser.add_argument( - "--pivot", - action="store_true", - help="Pivot the DataFrame, so that the tags become columns " - "of the DataFrame.", - ) - parser.add_argument( - "--include_wall_time", - action="store_true", - help="Include wall_time column(s) in the DataFrame", - ) - return parser.parse_args() - - -def main(args): - experiment = experiment_from_dev.ExperimentFromDev( - args.experiment_id, api_endpoint=args.api_endpoint - ) - dataframe = experiment.get_scalars( - pivot=args.pivot, include_wall_time=args.include_wall_time - ) - print(dataframe) - - -if __name__ == "__main__": - main(parse_args()) diff --git a/tensorboard/data/experimental/utils.py b/tensorboard/data/experimental/utils.py deleted file mode 100644 index a093e17ad6..0000000000 --- a/tensorboard/data/experimental/utils.py +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Utility methods for working with the Experiment Data Access API.""" - -import numpy as np - - -def pivot_dataframe(dataframe): - """Gets a pivoted wide-form pandas dataframe. - - The wide-form DataFrame has all its tags included as columns of the - DataFrame, which is more convenient to work. If the condition of having - uniform sets of step values across all tags in all runs is not met, - this will error. - - Args: - dataframe: pandas dataframe to pivot. - - Returns: - Pivoted wide-form pandas dataframe. - Raises: - ValueError if step values across all tags are not uniform. - """ - num_missing_0 = np.count_nonzero(dataframe.isnull().values) - dataframe = dataframe.pivot_table( - values=( - ["value", "wall_time"] - if "wall_time" in dataframe.columns - else "value" - ), - index=["run", "step"], - columns="tag", - dropna=False, - ) - num_missing_1 = np.count_nonzero(dataframe.isnull().values) - if num_missing_1 > num_missing_0: - raise ValueError( - "pivoted DataFrame contains missing value(s). " - "This is likely due to two timeseries having different " - "sets of steps in your experiment. " - "You can avoid this error by calling `get_scalars()` with " - "`pivot=False` to disable the DataFrame pivoting." - ) - # `reset_index()` removes the MultiIndex structure of the pivoted - # DataFrame. Before the call, the DataFrame consits of two levels - # of index: "run" and "step". After the call, the index become a - # single range index (e.g,. `dataframe[:2]` works). - dataframe = dataframe.reset_index() - # Remove the columns name "tag". - dataframe.columns.name = None - dataframe.columns.names = [None for name in dataframe.columns.names] - return dataframe diff --git a/tensorboard/pip_package/BUILD b/tensorboard/pip_package/BUILD index 8baa65b0fe..9e71cad7ef 100644 --- a/tensorboard/pip_package/BUILD +++ b/tensorboard/pip_package/BUILD @@ -42,7 +42,6 @@ sh_binary( "//tensorboard", # Main tensorboard binary and everything it uses "//tensorboard:lib", # User-facing overall TensorBoard API "//tensorboard:version", # Version module (read by setup.py) - "//tensorboard/data/experimental:experiment_from_dev", "//tensorboard/plugins/hparams", # User-facing hparams API "//tensorboard/plugins/mesh", # User-facing mesh API "//tensorboard/plugins/projector", # User-facing projector API diff --git a/tensorboard/uploader/BUILD b/tensorboard/uploader/BUILD index 12a365df15..a038c6a814 100644 --- a/tensorboard/uploader/BUILD +++ b/tensorboard/uploader/BUILD @@ -68,7 +68,6 @@ py_library( visibility = ["//tensorboard:internal"], deps = [ ":auth", - ":dry_run_stubs", ":exporter", ":flags_parser", ":formatters", @@ -79,7 +78,6 @@ py_library( "//tensorboard:expect_absl_logging_installed", "//tensorboard:expect_grpc_installed", "//tensorboard:program", - "//tensorboard/compat:tensorflow", "//tensorboard/plugins:base_plugin", "//tensorboard/uploader/proto:protos_all_py_pb2_grpc", ], @@ -90,7 +88,6 @@ py_test( srcs = ["uploader_subcommand_test.py"], srcs_version = "PY3", deps = [ - ":dry_run_stubs", ":server_info", ":uploader", ":uploader_subcommand", @@ -104,21 +101,11 @@ py_library( srcs = ["uploader.py"], srcs_version = "PY3", deps = [ - ":logdir_loader", - ":upload_tracker", ":util", "//tensorboard:expect_grpc_installed", - "//tensorboard:expect_protobuf_installed", - "//tensorboard/backend:process_graph", - "//tensorboard/backend/event_processing:directory_loader", - "//tensorboard/backend/event_processing:event_file_loader", - "//tensorboard/backend/event_processing:io_wrapper", - "//tensorboard/compat/proto:protos_all_py_pb2", - "//tensorboard/plugins/graph:metadata", "//tensorboard/uploader/proto:protos_all_py_pb2", "//tensorboard/util:grpc_util", "//tensorboard/util:tb_logging", - "//tensorboard/util:tensor_util", ], ) @@ -133,28 +120,15 @@ py_test( srcs = ["uploader_test.py"], srcs_version = "PY3", deps = [ - ":dry_run_stubs", ":server_info", ":test_util", - ":upload_tracker", ":uploader", - ":util", - "//tensorboard:data_compat", - "//tensorboard:dataclass_compat", "//tensorboard:expect_grpc_installed", "//tensorboard:expect_grpc_testing_installed", - "//tensorboard:expect_protobuf_installed", "//tensorboard:expect_tensorflow_installed", "//tensorboard/compat:no_tensorflow", - "//tensorboard/compat/proto:protos_all_py_pb2", - "//tensorboard/plugins/graph:metadata", - "//tensorboard/plugins/histogram:summary_v2", - "//tensorboard/plugins/scalar:metadata", - "//tensorboard/plugins/scalar:summary_v2", - "//tensorboard/summary:summary_v1", "//tensorboard/uploader/proto:protos_all_py_pb2", "//tensorboard/uploader/proto:protos_all_py_pb2_grpc", - "//tensorboard/util:test_util", ], ) @@ -168,26 +142,6 @@ py_test( ], ) -py_library( - name = "dry_run_stubs", - srcs = ["dry_run_stubs.py"], - srcs_version = "PY3", - deps = [ - "//tensorboard/uploader/proto:protos_all_py_pb2", - ], -) - -py_test( - name = "dry_run_stubs_test", - srcs = ["dry_run_stubs_test.py"], - srcs_version = "PY3", - deps = [ - ":dry_run_stubs", - "//tensorboard:test", - "//tensorboard/uploader/proto:protos_all_py_pb2", - ], -) - py_library( name = "auth", srcs = ["auth.py"], @@ -257,8 +211,8 @@ py_test( name = "util_test", srcs = ["util_test.py"], deps = [ - ":test_util", ":util", + "//tensorboard:expect_grpc_installed", "//tensorboard:expect_protobuf_installed", "//tensorboard:test", ], diff --git a/tensorboard/uploader/dry_run_stubs.py b/tensorboard/uploader/dry_run_stubs.py deleted file mode 100644 index 7303cfc34d..0000000000 --- a/tensorboard/uploader/dry_run_stubs.py +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Dry-run stubs for various rpc services.""" - - -from tensorboard.uploader.proto import write_service_pb2 - - -class DryRunTensorBoardWriterStub: - """A dry-run TensorBoardWriter gRPC Server. - - Only the methods used by the `tensorboard dev upload` are - mocked out in this class. - - When additional methods start to be used by the command, - their mocks should be added to this class. - """ - - def CreateExperiment(self, request, **kwargs): - """Create a new experiment and remember it has been created.""" - del request, kwargs # Unused. - return write_service_pb2.CreateExperimentResponse() - - def WriteScalar(self, request, **kwargs): - del request, kwargs # Unused. - return write_service_pb2.WriteScalarResponse() - - def WriteTensor(self, request, **kwargs): - del request, kwargs # Unused. - return write_service_pb2.WriteTensorResponse() - - def GetOrCreateBlobSequence(self, request, **kwargs): - del request, kwargs # Unused. - return write_service_pb2.GetOrCreateBlobSequenceResponse( - blob_sequence_id="dummy_blob_sequence_id" - ) - - def WriteBlob(self, request, **kwargs): - del kwargs # Unused. - for item in request: - yield write_service_pb2.WriteBlobResponse() diff --git a/tensorboard/uploader/dry_run_stubs_test.py b/tensorboard/uploader/dry_run_stubs_test.py deleted file mode 100644 index 50e7f4d2c0..0000000000 --- a/tensorboard/uploader/dry_run_stubs_test.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for dry-run rpc servicers.""" - - -from tensorboard import test as tb_test -from tensorboard.uploader import dry_run_stubs -from tensorboard.uploader.proto import write_service_pb2 - - -class DryRunTensorBoardWriterServicerTest(tb_test.TestCase): - def setUp(self): - super().setUp() - self._stub = dry_run_stubs.DryRunTensorBoardWriterStub() - - def testCreateExperiment(self): - self._stub.CreateExperiment(write_service_pb2.CreateExperimentRequest()) - - def testWriteScalar(self): - self._stub.WriteScalar(write_service_pb2.WriteScalarRequest()) - - def testWriteTensor(self): - self._stub.WriteTensor(write_service_pb2.WriteTensorRequest()) - - def testGetOrCreateBlobSequence(self): - self._stub.GetOrCreateBlobSequence( - write_service_pb2.GetOrCreateBlobSequenceRequest() - ) - - def testWriteBlob(self): - def dummy_iterator(): - yield write_service_pb2.WriteBlobRequest() - yield write_service_pb2.WriteBlobRequest() - - for response in self._stub.WriteBlob(dummy_iterator()): - self.assertTrue(response) - - -if __name__ == "__main__": - tb_test.main() diff --git a/tensorboard/uploader/uploader.py b/tensorboard/uploader/uploader.py index 3654524a9e..79a689e7d1 100644 --- a/tensorboard/uploader/uploader.py +++ b/tensorboard/uploader/uploader.py @@ -15,28 +15,11 @@ """Uploads a TensorBoard logdir to TensorBoard.dev.""" -import contextlib -import functools -import time - import grpc -from google.protobuf import message -from tensorboard.compat.proto import graph_pb2 -from tensorboard.compat.proto import summary_pb2 -from tensorboard.compat.proto import types_pb2 from tensorboard.uploader.proto import write_service_pb2 -from tensorboard.uploader import logdir_loader -from tensorboard.uploader import upload_tracker -from tensorboard.uploader import util -from tensorboard.backend import process_graph -from tensorboard.backend.event_processing import directory_loader -from tensorboard.backend.event_processing import event_file_loader -from tensorboard.backend.event_processing import io_wrapper -from tensorboard.plugins.graph import metadata as graphs_metadata from tensorboard.util import grpc_util from tensorboard.util import tb_logging -from tensorboard.util import tensor_util # Minimum length of a logdir polling cycle in seconds. Shorter cycles will # sleep to avoid spinning over the logdir, which isn't great for disks and can @@ -55,179 +38,6 @@ logger = tb_logging.get_logger() -class TensorBoardUploader: - """Uploads a TensorBoard logdir to TensorBoard.dev.""" - - def __init__( - self, - writer_client, - logdir, - allowed_plugins, - upload_limits, - logdir_poll_rate_limiter=None, - rpc_rate_limiter=None, - tensor_rpc_rate_limiter=None, - blob_rpc_rate_limiter=None, - name=None, - description=None, - verbosity=None, - one_shot=None, - ): - """Constructs a TensorBoardUploader. - - Args: - writer_client: a TensorBoardWriterService stub instance - logdir: path of the log directory to upload - allowed_plugins: collection of string plugin names; events will only - be uploaded if their time series's metadata specifies one of these - plugin names - upload_limits: instance of tensorboard.service.UploadLimits proto. - logdir_poll_rate_limiter: a `RateLimiter` to use to limit logdir - polling frequency, to avoid thrashing disks, especially on networked - file systems - rpc_rate_limiter: a `RateLimiter` to use to limit write RPC frequency. - Note this limit applies at the level of single RPCs in the Scalar - and Tensor case, but at the level of an entire blob upload in the - Blob case-- which may require a few preparatory RPCs and a stream - of chunks. Note the chunk stream is internally rate-limited by - backpressure from the server, so it is not a concern that we do not - explicitly rate-limit within the stream here. - name: String name to assign to the experiment. - description: String description to assign to the experiment. - verbosity: Level of verbosity, an integer. Supported value: - 0 - No upload statistics is printed. - 1 - Print upload statistics while uploading data (default). - one_shot: Once uploading starts, upload only the existing data in - the logdir and then return immediately, instead of the default - behavior of continuing to listen for new data in the logdir and - upload them when it appears. - """ - self._api = writer_client - self._logdir = logdir - self._allowed_plugins = frozenset(allowed_plugins) - self._upload_limits = upload_limits - - self._name = name - self._description = description - self._verbosity = 1 if verbosity is None else verbosity - self._one_shot = False if one_shot is None else one_shot - self._request_sender = None - self._experiment_id = None - if logdir_poll_rate_limiter is None: - self._logdir_poll_rate_limiter = util.RateLimiter( - _MIN_LOGDIR_POLL_INTERVAL_SECS - ) - else: - self._logdir_poll_rate_limiter = logdir_poll_rate_limiter - - if rpc_rate_limiter is None: - self._rpc_rate_limiter = util.RateLimiter( - self._upload_limits.min_scalar_request_interval / 1000 - ) - else: - self._rpc_rate_limiter = rpc_rate_limiter - - if tensor_rpc_rate_limiter is None: - self._tensor_rpc_rate_limiter = util.RateLimiter( - self._upload_limits.min_tensor_request_interval / 1000 - ) - else: - self._tensor_rpc_rate_limiter = tensor_rpc_rate_limiter - - if blob_rpc_rate_limiter is None: - self._blob_rpc_rate_limiter = util.RateLimiter( - self._upload_limits.min_blob_request_interval / 1000 - ) - else: - self._blob_rpc_rate_limiter = blob_rpc_rate_limiter - - active_filter = ( - lambda secs: secs + _EVENT_FILE_INACTIVE_SECS >= time.time() - ) - directory_loader_factory = functools.partial( - directory_loader.DirectoryLoader, - loader_factory=event_file_loader.TimestampedEventFileLoader, - path_filter=io_wrapper.IsTensorFlowEventsFile, - active_filter=active_filter, - ) - self._logdir_loader = logdir_loader.LogdirLoader( - self._logdir, directory_loader_factory - ) - self._tracker = upload_tracker.UploadTracker( - verbosity=self._verbosity, one_shot=self._one_shot - ) - - def has_data(self) -> bool: - """Returns this object's upload tracker.""" - return self._tracker.has_data() - - @property - def experiment_id(self) -> str: - """Returns the experiment_id associated with this uploader. - - May be none if no experiment is set, for instance, if - `create_experiment` has not been called. - """ - return self._experiment_id - - def create_experiment(self): - """Creates an Experiment for this upload session and returns the ID.""" - logger.info("Creating experiment") - request = write_service_pb2.CreateExperimentRequest( - name=self._name, description=self._description - ) - response = grpc_util.call_with_retries( - self._api.CreateExperiment, request - ) - self._request_sender = _BatchedRequestSender( - response.experiment_id, - self._api, - allowed_plugins=self._allowed_plugins, - upload_limits=self._upload_limits, - rpc_rate_limiter=self._rpc_rate_limiter, - tensor_rpc_rate_limiter=self._tensor_rpc_rate_limiter, - blob_rpc_rate_limiter=self._blob_rpc_rate_limiter, - tracker=self._tracker, - ) - self._experiment_id = response.experiment_id - return response.experiment_id - - def start_uploading(self): - """Uploads data from the logdir. - - This will continuously scan the logdir, uploading as data is added - unless the uploader was built with the _one_shot option, in which - case it will terminate after the first scan. - - Raises: - RuntimeError: If `create_experiment` has not yet been called. - ExperimentNotFoundError: If the experiment is deleted during the - course of the upload. - """ - if self._request_sender is None: - raise RuntimeError( - "Must call create_experiment() before start_uploading()" - ) - while True: - self._logdir_poll_rate_limiter.tick() - self._upload_once() - if self._one_shot: - break - - def _upload_once(self): - """Runs one upload cycle, sending zero or more RPCs.""" - logger.info("Starting an upload cycle") - - sync_start_time = time.time() - self._logdir_loader.synchronize_runs() - sync_duration_secs = time.time() - sync_start_time - logger.info("Logdir sync took %.3f seconds", sync_duration_secs) - - run_to_events = self._logdir_loader.get_run_events() - with self._tracker.send_tracker(): - self._request_sender.send_requests(run_to_events) - - def update_experiment_metadata( writer_client, experiment_id, name=None, description=None ): @@ -308,923 +118,3 @@ class ExperimentNotFoundError(RuntimeError): class PermissionDeniedError(RuntimeError): pass - - -class _OutOfSpaceError(Exception): - """Action could not proceed without overflowing request budget. - - This is a signaling exception (like `StopIteration`) used internally - by `_*RequestSender`; it does not mean that anything has gone wrong. - """ - - pass - - -class _BatchedRequestSender: - """Helper class for building requests that fit under a size limit. - - This class maintains stateful request builders for each of the possible - request types (scalars, tensors, and blobs). These accumulate batches - independently, each maintaining its own byte budget and emitting a request - when the batch becomes full. As a consequence, events of different types - will likely be sent to the backend out of order. E.g., in the extreme case, - a single tensor-flavored request may be sent only when the event stream is - exhausted, even though many more recent scalar events were sent earlier. - - This class is not threadsafe. Use external synchronization if - calling its methods concurrently. - """ - - def __init__( - self, - experiment_id, - api, - allowed_plugins, - upload_limits, - rpc_rate_limiter, - tensor_rpc_rate_limiter, - blob_rpc_rate_limiter, - tracker, - ): - # Map from `(run_name, tag_name)` to `SummaryMetadata` if the time - # series is a scalar time series, else to `_NON_SCALAR_TIME_SERIES`. - self._tag_metadata = {} - self._allowed_plugins = frozenset(allowed_plugins) - self._tracker = tracker - self._scalar_request_sender = _ScalarBatchedRequestSender( - experiment_id, - api, - rpc_rate_limiter, - upload_limits.max_scalar_request_size, - tracker=self._tracker, - ) - self._tensor_request_sender = _TensorBatchedRequestSender( - experiment_id, - api, - tensor_rpc_rate_limiter, - upload_limits.max_tensor_request_size, - upload_limits.max_tensor_point_size, - tracker=self._tracker, - ) - self._blob_request_sender = _BlobRequestSender( - experiment_id, - api, - blob_rpc_rate_limiter, - upload_limits.max_blob_request_size, - upload_limits.max_blob_size, - tracker=self._tracker, - ) - self._tracker = tracker - - def send_requests(self, run_to_events): - """Accepts a stream of TF events and sends batched write RPCs. - - Each sent request will be batched, the size of each batch depending on - the type of data (Scalar vs Tensor vs Blob) being sent. - - Args: - run_to_events: Mapping from run name to generator of `tf.Event` - values, as returned by `LogdirLoader.get_run_events`. - - Raises: - RuntimeError: If no progress can be made because even a single - point is too large (say, due to a gigabyte-long tag name). - """ - - for (run_name, event, value) in self._run_values(run_to_events): - time_series_key = (run_name, value.tag) - - # The metadata for a time series is memorized on the first event. - # If later events arrive with a mismatching plugin_name, they are - # ignored with a warning. - metadata = self._tag_metadata.get(time_series_key) - first_in_time_series = False - if metadata is None: - first_in_time_series = True - metadata = value.metadata - self._tag_metadata[time_series_key] = metadata - - plugin_name = metadata.plugin_data.plugin_name - # TODO(cais): Call self._tracker.add_plugin_name() to track the - # data for what plugins have been uploaded. - if value.HasField("metadata") and ( - plugin_name != value.metadata.plugin_data.plugin_name - ): - logger.warning( - "Mismatching plugin names for %s. Expected %s, found %s.", - time_series_key, - metadata.plugin_data.plugin_name, - value.metadata.plugin_data.plugin_name, - ) - continue - if plugin_name not in self._allowed_plugins: - if first_in_time_series: - logger.info( - "Skipping time series %r with unsupported plugin name %r", - time_series_key, - plugin_name, - ) - continue - - if metadata.data_class == summary_pb2.DATA_CLASS_SCALAR: - self._scalar_request_sender.add_event( - run_name, event, value, metadata - ) - elif metadata.data_class == summary_pb2.DATA_CLASS_TENSOR: - self._tensor_request_sender.add_event( - run_name, event, value, metadata - ) - elif metadata.data_class == summary_pb2.DATA_CLASS_BLOB_SEQUENCE: - self._blob_request_sender.add_event( - run_name, event, value, metadata - ) - - self._scalar_request_sender.flush() - self._tensor_request_sender.flush() - self._blob_request_sender.flush() - - def _run_values(self, run_to_events): - """Helper generator to create a single stream of work items. - - Note that `dataclass_compat` may emit multiple variants of - the same event, for backwards compatibility. Thus this stream should - be filtered to obtain the desired version of each event. Here, we - ignore any event that does not have a `summary` field. - - Furthermore, the events emitted here could contain values that do not - have `metadata.data_class` set; these too should be ignored. In - `_send_summary_value(...)` above, we switch on `metadata.data_class` - and drop any values with an unknown (i.e., absent or unrecognized) - `data_class`. - """ - # Note that this join in principle has deletion anomalies: if the input - # stream contains runs with no events, or events with no values, we'll - # lose that information. This is not a problem: we would need to prune - # such data from the request anyway. - for (run_name, events) in run_to_events.items(): - for event in events: - _filter_graph_defs(event) - for value in event.summary.value: - yield (run_name, event, value) - - -class _ScalarBatchedRequestSender: - """Helper class for building requests that fit under a size limit. - - This class accumulates a current request. `add_event(...)` may or may not - send the request (and start a new one). After all `add_event(...)` calls - are complete, a final call to `flush()` is needed to send the final request. - - This class is not threadsafe. Use external synchronization if calling its - methods concurrently. - """ - - def __init__( - self, - experiment_id, - api, - rpc_rate_limiter, - max_request_size, - tracker, - ): - if experiment_id is None: - raise ValueError("experiment_id cannot be None") - self._experiment_id = experiment_id - self._api = api - self._rpc_rate_limiter = rpc_rate_limiter - self._byte_budget_manager = _ByteBudgetManager(max_request_size) - self._tracker = tracker - - self._runs = {} # cache: map from run name to `Run` proto in request - self._tags = ( - {} - ) # cache: map from `(run, tag)` to `Tag` proto in run in request - self._new_request() - - def _new_request(self): - """Allocates a new request and refreshes the budget.""" - self._request = write_service_pb2.WriteScalarRequest() - self._runs.clear() - self._tags.clear() - self._num_values = 0 - self._request.experiment_id = self._experiment_id - self._byte_budget_manager.reset(self._request) - - def add_event(self, run_name, event, value, metadata): - """Attempts to add the given event to the current request. - - If the event cannot be added to the current request because the byte - budget is exhausted, the request is flushed, and the event is added - to the next request. - """ - try: - self._add_event_internal(run_name, event, value, metadata) - except _OutOfSpaceError: - self.flush() - # Try again. This attempt should never produce OutOfSpaceError - # because we just flushed. - try: - self._add_event_internal(run_name, event, value, metadata) - except _OutOfSpaceError: - raise RuntimeError("add_event failed despite flush") - - def _add_event_internal(self, run_name, event, value, metadata): - run_proto = self._runs.get(run_name) - if run_proto is None: - run_proto = self._create_run(run_name) - self._runs[run_name] = run_proto - tag_proto = self._tags.get((run_name, value.tag)) - if tag_proto is None: - tag_proto = self._create_tag(run_proto, value.tag, metadata) - self._tags[(run_name, value.tag)] = tag_proto - self._create_point(tag_proto, event, value) - self._num_values += 1 - - def flush(self): - """Sends the active request after removing empty runs and tags. - - Starts a new, empty active request. - """ - request = self._request - _prune_empty_tags_and_runs(request) - if not request.runs: - return - - self._rpc_rate_limiter.tick() - - with _request_logger( - request, request.runs - ), self._tracker.scalars_tracker(self._num_values): - try: - # TODO(@nfelt): execute this RPC asynchronously. - grpc_util.call_with_retries(self._api.WriteScalar, request) - except grpc.RpcError as e: - if e.code() == grpc.StatusCode.NOT_FOUND: - raise ExperimentNotFoundError() - logger.error("Upload call failed with error %s", e) - - self._new_request() - - def _create_run(self, run_name): - """Adds a run to the live request, if there's space. - - Args: - run_name: String name of the run to add. - - Returns: - The `WriteScalarRequest.Run` that was added to `request.runs`. - - Raises: - _OutOfSpaceError: If adding the run would exceed the remaining - request budget. - """ - run_proto = self._request.runs.add(name=run_name) - self._byte_budget_manager.add_run(run_proto) - return run_proto - - def _create_tag(self, run_proto, tag_name, metadata): - """Adds a tag for the given value, if there's space. - - Args: - run_proto: `WriteScalarRequest.Run` proto to which to add a tag. - tag_name: String name of the tag to add (as `value.tag`). - metadata: TensorBoard `SummaryMetadata` proto from the first - occurrence of this time series. - - Returns: - The `WriteScalarRequest.Tag` that was added to `run_proto.tags`. - - Raises: - _OutOfSpaceError: If adding the tag would exceed the remaining - request budget. - """ - tag_proto = run_proto.tags.add(name=tag_name) - tag_proto.metadata.CopyFrom(metadata) - self._byte_budget_manager.add_tag(tag_proto) - return tag_proto - - def _create_point(self, tag_proto, event, value): - """Adds a scalar point to the given tag, if there's space. - - Args: - tag_proto: `WriteScalarRequest.Tag` proto to which to add a point. - event: Enclosing `Event` proto with the step and wall time data. - value: Scalar `Summary.Value` proto with the actual scalar data. - - Raises: - _OutOfSpaceError: If adding the point would exceed the remaining - request budget. - """ - point = tag_proto.points.add() - point.step = event.step - # TODO(@nfelt): skip tensor roundtrip for Value with simple_value set - point.value = tensor_util.make_ndarray(value.tensor).item() - util.set_timestamp(point.wall_time, event.wall_time) - try: - self._byte_budget_manager.add_point(point) - except _OutOfSpaceError: - tag_proto.points.pop() - raise - - -class _TensorBatchedRequestSender: - """Helper class for building WriteTensor() requests that fit under a size limit. - - This class accumulates a current request. `add_event(...)` may or may not - send the request (and start a new one). After all `add_event(...)` calls - are complete, a final call to `flush()` is needed to send the final request. - - This class is not threadsafe. Use external synchronization if calling its - methods concurrently. - """ - - def __init__( - self, - experiment_id, - api, - rpc_rate_limiter, - max_request_size, - max_tensor_point_size, - tracker, - ): - if experiment_id is None: - raise ValueError("experiment_id cannot be None") - self._experiment_id = experiment_id - self._api = api - self._rpc_rate_limiter = rpc_rate_limiter - self._byte_budget_manager = _ByteBudgetManager(max_request_size) - self._max_tensor_point_size = max_tensor_point_size - self._tracker = tracker - - self._runs = {} # cache: map from run name to `Run` proto in request - self._tags = ( - {} - ) # cache: map from `(run, tag)` to `Tag` proto in run in request - self._new_request() - - def _new_request(self): - """Allocates a new request and refreshes the budget.""" - - self._request = write_service_pb2.WriteTensorRequest() - self._runs.clear() - self._tags.clear() - self._request.experiment_id = self._experiment_id - self._byte_budget_manager.reset(self._request) - self._num_values = 0 - self._num_values_skipped = 0 - self._tensor_bytes = 0 - self._tensor_bytes_skipped = 0 - - def add_event(self, run_name, event, value, metadata): - """Attempts to add the given event to the current request. - - If the event cannot be added to the current request because the byte - budget is exhausted, the request is flushed, and the event is added - to the next request. - """ - try: - self._add_event_internal(run_name, event, value, metadata) - except _OutOfSpaceError: - self.flush() - # Try again. This attempt should never produce OutOfSpaceError - # because we just flushed. - try: - self._add_event_internal(run_name, event, value, metadata) - except _OutOfSpaceError: - raise RuntimeError("add_event failed despite flush") - - def _add_event_internal(self, run_name, event, value, metadata): - run_proto = self._runs.get(run_name) - if run_proto is None: - run_proto = self._create_run(run_name) - self._runs[run_name] = run_proto - tag_proto = self._tags.get((run_name, value.tag)) - if tag_proto is None: - tag_proto = self._create_tag(run_proto, value.tag, metadata) - self._tags[(run_name, value.tag)] = tag_proto - self._create_point(tag_proto, event, value, run_name) - self._num_values += 1 - - def flush(self): - """Sends the active request after removing empty runs and tags. - - Starts a new, empty active request. - """ - request = self._request - _prune_empty_tags_and_runs(request) - if not request.runs: - return - - self._rpc_rate_limiter.tick() - - with _request_logger(request, request.runs): - with self._tracker.tensors_tracker( - self._num_values, - self._num_values_skipped, - self._tensor_bytes, - self._tensor_bytes_skipped, - ): - try: - grpc_util.call_with_retries(self._api.WriteTensor, request) - except grpc.RpcError as e: - if e.code() == grpc.StatusCode.NOT_FOUND: - raise ExperimentNotFoundError() - logger.error("Upload call failed with error %s", e) - - self._new_request() - - def _create_run(self, run_name): - """Adds a run to the live request, if there's space. - - Args: - run_name: String name of the run to add. - - Returns: - The `WriteTensorRequest.Run` that was added to `request.runs`. - - Raises: - _OutOfSpaceError: If adding the run would exceed the remaining - request budget. - """ - run_proto = self._request.runs.add(name=run_name) - self._byte_budget_manager.add_run(run_proto) - return run_proto - - def _create_tag(self, run_proto, tag_name, metadata): - """Adds a tag for the given value, if there's space. - - Args: - run_proto: `WriteTensorRequest.Run` proto to which to add a tag. - tag_name: String name of the tag to add (as `value.tag`). - metadata: TensorBoard `SummaryMetadata` proto from the first - occurrence of this time series. - - Returns: - The `WriteTensorRequest.Tag` that was added to `run_proto.tags`. - - Raises: - _OutOfSpaceError: If adding the tag would exceed the remaining - request budget. - """ - tag_proto = run_proto.tags.add(name=tag_name) - tag_proto.metadata.CopyFrom(metadata) - self._byte_budget_manager.add_tag(tag_proto) - return tag_proto - - def _create_point(self, tag_proto, event, value, run_name): - """Adds a tensor point to the given tag, if there's space. - - Args: - tag_proto: `WriteTensorRequest.Tag` proto to which to add a point. - event: Enclosing `Event` proto with the step and wall time data. - value: Tensor `Summary.Value` proto with the actual tensor data. - run_name: Name of the wrong, only used for error reporting. - - Raises: - _OutOfSpaceError: If adding the point would exceed the remaining - request budget. - """ - point = tag_proto.points.add() - point.step = event.step - point.value.CopyFrom(value.tensor) - util.set_timestamp(point.wall_time, event.wall_time) - - self._tensor_bytes += point.value.ByteSize() - if point.value.ByteSize() > self._max_tensor_point_size: - logger.warning( - "Tensor (run:%s, tag:%s, step: %d) too large; skipping. " - "Size %d exceeds limit of %d bytes.", - run_name, - tag_proto.name, - event.step, - point.value.ByteSize(), - self._max_tensor_point_size, - ) - tag_proto.points.pop() - self._num_values_skipped += 1 - self._tensor_bytes_skipped += point.value.ByteSize() - return - - self._validate_tensor_value( - value.tensor, value.tag, event.step, event.wall_time - ) - - try: - self._byte_budget_manager.add_point(point) - except _OutOfSpaceError: - tag_proto.points.pop() - raise - - def _validate_tensor_value(self, tensor_proto, tag, step, wall_time): - """Validate a TensorProto by attempting to parse it.""" - try: - tensor_util.make_ndarray(tensor_proto) - except ValueError as error: - raise ValueError( - "The uploader failed to upload a tensor. This seems to be " - "due to a malformation in the tensor, which may be caused by " - "a bug in the process that wrote the tensor.\n\n" - "The tensor has tag '%s' and is at step %d and wall_time %.6f.\n\n" - "Original error:\n%s" % (tag, step, wall_time, error) - ) - - -class _ByteBudgetManager: - """Helper class for managing the request byte budget for certain RPCs. - - This should be used for RPCs that organize data by Runs, Tags, and Points, - specifically WriteScalar and WriteTensor. - - Any call to add_run(), add_tag(), or add_point() may raise an - _OutOfSpaceError, which is non-fatal. It signals to the caller that they - should flush the current request and begin a new one. - - For more information on the protocol buffer encoding and how byte cost - can be calculated, visit: - - https://developers.google.com/protocol-buffers/docs/encoding - """ - - def __init__(self, max_bytes): - # The remaining number of bytes that we may yet add to the request. - self._byte_budget = None # type: int - self._max_bytes = max_bytes - - def reset(self, base_request): - """Resets the byte budget and calculates the cost of the base request. - - Args: - base_request: Base request. - - Raises: - _OutOfSpaceError: If the size of the request exceeds the entire - request byte budget. - """ - self._byte_budget = self._max_bytes - self._byte_budget -= base_request.ByteSize() - if self._byte_budget < 0: - raise RuntimeError("Byte budget too small for base request") - - def add_run(self, run_proto): - """Integrates the cost of a run proto into the byte budget. - - Args: - run_proto: The proto representing a run. - - Raises: - _OutOfSpaceError: If adding the run would exceed the remaining request - budget. - """ - cost = ( - # The size of the run proto without any tag fields set. - run_proto.ByteSize() - # The size of the varint that describes the length of the run - # proto. We can't yet know the final size of the run proto -- we - # haven't yet set any tag or point values -- so we can't know the - # final size of this length varint. We conservatively assume it is - # maximum size. - + _MAX_VARINT64_LENGTH_BYTES - # The size of the proto key. - + 1 - ) - if cost > self._byte_budget: - raise _OutOfSpaceError() - self._byte_budget -= cost - - def add_tag(self, tag_proto): - """Integrates the cost of a tag proto into the byte budget. - - Args: - tag_proto: The proto representing a tag. - - Raises: - _OutOfSpaceError: If adding the tag would exceed the remaining request - budget. - """ - cost = ( - # The size of the tag proto without any tag fields set. - tag_proto.ByteSize() - # The size of the varint that describes the length of the tag - # proto. We can't yet know the final size of the tag proto -- we - # haven't yet set any point values -- so we can't know the final - # size of this length varint. We conservatively assume it is maximum - # size. - + _MAX_VARINT64_LENGTH_BYTES - # The size of the proto key. - + 1 - ) - if cost > self._byte_budget: - raise _OutOfSpaceError() - self._byte_budget -= cost - - def add_point(self, point_proto): - """Integrates the cost of a point proto into the byte budget. - - Args: - point_proto: The proto representing a point. - - Raises: - _OutOfSpaceError: If adding the point would exceed the remaining request - budget. - """ - submessage_cost = point_proto.ByteSize() - cost = ( - # The size of the point proto. - submessage_cost - # The size of the varint that describes the length of the point - # proto. - + _varint_cost(submessage_cost) - # The size of the proto key. - + 1 - ) - if cost > self._byte_budget: - raise _OutOfSpaceError() - self._byte_budget -= cost - - -class _BlobRequestSender: - """Uploader for blob-type event data. - - Unlike the other types, this class does not accumulate events in batches; - every blob is sent individually and immediately. Nonetheless we retain - the `add_event()`/`flush()` structure for symmetry. - - This class is not threadsafe. Use external synchronization if calling its - methods concurrently. - """ - - def __init__( - self, - experiment_id, - api, - rpc_rate_limiter, - max_blob_request_size, - max_blob_size, - tracker, - ): - if experiment_id is None: - raise ValueError("experiment_id cannot be None") - self._experiment_id = experiment_id - self._api = api - self._rpc_rate_limiter = rpc_rate_limiter - self._max_blob_request_size = max_blob_request_size - self._max_blob_size = max_blob_size - self._tracker = tracker - - # Start in the empty state, just like self._new_request(). - self._run_name = None - self._event = None - self._value = None - self._metadata = None - - def _new_request(self): - """Declares the previous event complete.""" - self._run_name = None - self._event = None - self._value = None - self._metadata = None - - def add_event( - self, - run_name, - event, - value, - metadata, - ): - """Attempts to add the given event to the current request. - - If the event cannot be added to the current request because the byte - budget is exhausted, the request is flushed, and the event is added - to the next request. - """ - if self._value: - raise RuntimeError("Tried to send blob while another is pending") - self._run_name = run_name - self._event = event # provides step and possibly plugin_name - self._value = value - # TODO(soergel): should we really unpack the tensor here, or ship - # it wholesale and unpack server side, or something else? - # TODO(soergel): can we extract the proto fields directly instead? - self._blobs = tensor_util.make_ndarray(self._value.tensor) - if self._blobs.ndim == 1: - self._metadata = metadata - self.flush() - else: - logger.warning( - "A blob sequence must be represented as a rank-1 Tensor. " - "Provided data has rank %d, for run %s, tag %s, step %s ('%s' plugin) .", - self._blobs.ndim, - run_name, - self._value.tag, - self._event.step, - metadata.plugin_data.plugin_name, - ) - # Skip this upload. - self._new_request() - - def flush(self): - """Sends the current blob sequence fully, and clears it to make way for the next.""" - if self._value: - blob_sequence_id = self._get_or_create_blob_sequence() - logger.info( - "Sending %d blobs for sequence id: %s", - len(self._blobs), - blob_sequence_id, - ) - - sent_blobs = 0 - for seq_index, blob in enumerate(self._blobs): - # Note the _send_blob() stream is internally flow-controlled. - # This rate limit applies to *starting* the stream. - self._rpc_rate_limiter.tick() - with self._tracker.blob_tracker(len(blob)) as blob_tracker: - sent_blobs += self._send_blob( - blob_sequence_id, seq_index, blob - ) - blob_tracker.mark_uploaded(bool(sent_blobs)) - - logger.info( - "Sent %d of %d blobs for sequence id: %s", - sent_blobs, - len(self._blobs), - blob_sequence_id, - ) - - self._new_request() - - def _get_or_create_blob_sequence(self): - request = write_service_pb2.GetOrCreateBlobSequenceRequest( - experiment_id=self._experiment_id, - run=self._run_name, - tag=self._value.tag, - step=self._event.step, - final_sequence_length=len(self._blobs), - metadata=self._metadata, - ) - util.set_timestamp(request.wall_time, self._event.wall_time) - with _request_logger(request): - try: - # TODO(@nfelt): execute this RPC asynchronously. - response = grpc_util.call_with_retries( - self._api.GetOrCreateBlobSequence, request - ) - blob_sequence_id = response.blob_sequence_id - except grpc.RpcError as e: - if e.code() == grpc.StatusCode.NOT_FOUND: - raise ExperimentNotFoundError() - logger.error("Upload call failed with error %s", e) - # TODO(soergel): clean up - raise - - return blob_sequence_id - - def _send_blob(self, blob_sequence_id, seq_index, blob): - """Tries to send a single blob for a given index within a blob sequence. - - The blob will not be sent if it was sent already, or if it is too large. - - Returns: - The number of blobs successfully sent (i.e., 1 or 0). - """ - # TODO(soergel): retry and resume logic - - if len(blob) > self._max_blob_size: - logger.warning( - "Blob too large; skipping. Size %d exceeds limit of %d bytes.", - len(blob), - self._max_blob_size, - ) - return 0 - - request_iterator = self._write_blob_request_iterator( - blob_sequence_id, seq_index, blob - ) - upload_start_time = time.time() - count = 0 - # TODO(soergel): don't wait for responses for greater throughput - # See https://stackoverflow.com/questions/55029342/handling-async-streaming-request-in-grpc-python - try: - for response in self._api.WriteBlob(request_iterator): - count += 1 - # TODO(soergel): validate responses? probably not. - pass - upload_duration_secs = time.time() - upload_start_time - logger.info( - "Upload for %d chunks totaling %d bytes took %.3f seconds (%.3f MB/sec)", - count, - len(blob), - upload_duration_secs, - len(blob) / upload_duration_secs / (1024 * 1024), - ) - return 1 - except grpc.RpcError as e: - if e.code() == grpc.StatusCode.ALREADY_EXISTS: - logger.error("Attempted to re-upload existing blob. Skipping.") - return 0 - else: - logger.info("WriteBlob RPC call got error %s", e) - raise - - def _write_blob_request_iterator(self, blob_sequence_id, seq_index, blob): - # For now all use cases have the blob in memory already. - # In the future we may want to stream from disk; that will require - # refactoring here. - # TODO(soergel): compute crc32c's to allow server-side data validation. - for offset in range(0, len(blob), self._max_blob_request_size): - chunk = blob[offset : offset + self._max_blob_request_size] - finalize_object = offset + self._max_blob_request_size >= len(blob) - request = write_service_pb2.WriteBlobRequest( - blob_sequence_id=blob_sequence_id, - index=seq_index, - data=chunk, - offset=offset, - crc32c=None, - finalize_object=finalize_object, - final_crc32c=None, - blob_bytes=len(blob), - ) - yield request - - -@contextlib.contextmanager -def _request_logger(request, runs=None): - upload_start_time = time.time() - request_bytes = request.ByteSize() - logger.info("Trying request of %d bytes", request_bytes) - yield - upload_duration_secs = time.time() - upload_start_time - if runs: - logger.info( - "Upload for %d runs (%d bytes) took %.3f seconds", - len(runs), - request_bytes, - upload_duration_secs, - ) - else: - logger.info( - "Upload of (%d bytes) took %.3f seconds", - request_bytes, - upload_duration_secs, - ) - - -def _varint_cost(n): - """Computes the size of `n` encoded as an unsigned base-128 varint. - - This should be consistent with the proto wire format: - - - Args: - n: A non-negative integer. - - Returns: - An integer number of bytes. - """ - result = 1 - while n >= 128: - result += 1 - n >>= 7 - return result - - -def _prune_empty_tags_and_runs(request): - for (run_idx, run) in reversed(list(enumerate(request.runs))): - for (tag_idx, tag) in reversed(list(enumerate(run.tags))): - if not tag.points: - del run.tags[tag_idx] - if not run.tags: - del request.runs[run_idx] - - -def _filter_graph_defs(event): - for v in event.summary.value: - if v.metadata.plugin_data.plugin_name != graphs_metadata.PLUGIN_NAME: - continue - if v.tag == graphs_metadata.RUN_GRAPH_NAME: - data = list(v.tensor.string_val) - filtered_data = [_filtered_graph_bytes(x) for x in data] - filtered_data = [x for x in filtered_data if x is not None] - if filtered_data != data: - new_tensor = tensor_util.make_tensor_proto( - filtered_data, dtype=types_pb2.DT_STRING - ) - v.tensor.CopyFrom(new_tensor) - - -def _filtered_graph_bytes(graph_bytes): - try: - graph_def = graph_pb2.GraphDef().FromString(graph_bytes) - # The reason for the RuntimeWarning catch here is b/27494216, whereby - # some proto parsers incorrectly raise that instead of DecodeError - # on certain kinds of malformed input. Triggering this seems to require - # a combination of mysterious circumstances. - except (message.DecodeError, RuntimeWarning): - logger.warning( - "Could not parse GraphDef of size %d. Skipping.", - len(graph_bytes), - ) - return None - # Use the default filter parameters: - # limit_attr_size=1024, large_attrs_key="_too_large_attrs" - process_graph.prepare_graph_for_ui(graph_def) - return graph_def.SerializeToString() diff --git a/tensorboard/uploader/uploader_subcommand.py b/tensorboard/uploader/uploader_subcommand.py index 64ec292bfe..bd4bcaf4a0 100644 --- a/tensorboard/uploader/uploader_subcommand.py +++ b/tensorboard/uploader/uploader_subcommand.py @@ -16,19 +16,16 @@ import abc -import os import sys import textwrap from absl import logging import grpc -from tensorboard.compat import tf from tensorboard.uploader.proto import experiment_pb2 from tensorboard.uploader.proto import export_service_pb2_grpc from tensorboard.uploader.proto import write_service_pb2_grpc from tensorboard.uploader import auth -from tensorboard.uploader import dry_run_stubs from tensorboard.uploader import exporter as exporter_lib from tensorboard.uploader import flags_parser from tensorboard.uploader import formatters @@ -59,7 +56,7 @@ def _prompt_for_user_ack(intent): """Prompts for user consent, exiting the program if they decline.""" body = intent.get_ack_message_body() - header = "\n***** TensorBoard Uploader *****\n" + header = "\n***** TensorBoard.dev Uploader *****\n" user_ack_message = "\n".join((header, body, _MESSAGE_TOS)) sys.stderr.write(user_ack_message) sys.stderr.write("\n") @@ -87,6 +84,31 @@ def _run(flags, experiment_url_callback=None): sys.stderr.write("Logged out of uploader.\n") sys.stderr.flush() return + if isinstance(intent, UploadIntent): + sys.stderr.write( + textwrap.dedent( + """\ + **************************************************************** + **************************************************************** + **************************************************************** + + Uploading TensorBoard logs to https://tensorboard.dev/ is no longer + supported. + + TensorBoard.dev is shutting down. + + Please export your experiments by Dec 31, 2023. + + See the FAQ at https://tensorboard.dev. + + **************************************************************** + **************************************************************** + **************************************************************** + """ + ) + ) + sys.stderr.flush() + return # TODO(b/141723268): maybe reconfirm Google Account prior to reuse. credentials = store.read_credentials() if not credentials: @@ -397,115 +419,25 @@ def _die_if_bad_experiment_description(description): class UploadIntent(_Intent): - """The user intends to upload an experiment from the given logdir.""" + """The user intends to upload an experiment from the given logdir. - _MESSAGE_TEMPLATE = textwrap.dedent( - """\ - This will upload your TensorBoard logs to https://tensorboard.dev/ from - the following directory: + However, TensorBoard.dev is being turned down and we no longer allow + upload. + """ - {logdir} + def get_ack_message_body(self): + """Does nothing. - This TensorBoard will be visible to everyone. Do not upload sensitive - data. + Uploading is no longer supported and is handled specially by main. """ - ) - - def __init__( - self, - logdir, - name=None, - description=None, - verbosity=None, - dry_run=None, - one_shot=None, - experiment_url_callback=None, - ): - self.logdir = logdir - self.name = name - self.description = description - self.verbosity = verbosity - self.dry_run = False if dry_run is None else dry_run - self.one_shot = False if one_shot is None else one_shot - self.experiment_url_callback = experiment_url_callback - - def get_ack_message_body(self): - return self._MESSAGE_TEMPLATE.format(logdir=self.logdir) + return "" def execute(self, server_info, channel): - if self.dry_run: - api_client = dry_run_stubs.DryRunTensorBoardWriterStub() - else: - api_client = write_service_pb2_grpc.TensorBoardWriterServiceStub( - channel - ) - _die_if_bad_experiment_name(self.name) - _die_if_bad_experiment_description(self.description) - uploader = uploader_lib.TensorBoardUploader( - api_client, - self.logdir, - allowed_plugins=server_info_lib.allowed_plugins(server_info), - upload_limits=server_info_lib.upload_limits(server_info), - name=self.name, - description=self.description, - verbosity=self.verbosity, - one_shot=self.one_shot, - ) - if self.one_shot and not tf.io.gfile.isdir(self.logdir): - print("%s: No such directory." % self.logdir) - print( - "User specified `one_shot` mode with an unavailable " - "logdir. Exiting without creating an experiment." - ) - return - experiment_id = uploader.create_experiment() - url = server_info_lib.experiment_url(server_info, experiment_id) - if self.experiment_url_callback is not None: - self.experiment_url_callback(url) - if not self.one_shot: - print( - "Upload started and will continue reading any new data as it's " - "added to the logdir.\n\nTo stop uploading, press Ctrl-C." - ) - if self.dry_run: - print( - "\n** This is a dry run. " - "No data will be sent to tensorboard.dev. **\n" - ) - else: - print( - "\nNew experiment created. View your TensorBoard at: %s\n" % url - ) - interrupted = False - try: - uploader.start_uploading() - except uploader_lib.ExperimentNotFoundError: - print("Experiment was deleted; uploading has been cancelled") - return - except KeyboardInterrupt: - interrupted = True - finally: - if self.one_shot and not uploader.has_data(): - print( - "TensorBoard was run in `one_shot` mode, but did not find " - "any uploadable data in the specified logdir: %s\n" - "An empty experiment was created. " - "To delete the empty experiment you can execute the " - "following\n\n" - " tensorboard dev delete --experiment_id=%s" - % (self.logdir, uploader.experiment_id) - ) - end_message = "\n\n" - if interrupted: - end_message += "Interrupted." - else: - end_message += "Done." - # Only Add the "View your TensorBoard" message if there was any - # data added at all. - if not self.dry_run and uploader.has_data(): - end_message += " View your TensorBoard at %s" % url - sys.stdout.write(end_message + "\n") - sys.stdout.flush() + """Does nothing. + + Uploading is no longer supported and is handled specially by main. + """ + pass class _ExportIntent(_Intent): @@ -575,20 +507,8 @@ def _get_intent(flags, experiment_url_callback=None): if cmd is None: raise base_plugin.FlagsError("Must specify subcommand (try --help).") if cmd == flags_parser.SUBCOMMAND_KEY_UPLOAD: - if flags.logdir: - return UploadIntent( - os.path.expanduser(flags.logdir), - name=flags.name, - description=flags.description, - verbosity=flags.verbose, - dry_run=flags.dry_run, - one_shot=flags.one_shot, - experiment_url_callback=experiment_url_callback, - ) - else: - raise base_plugin.FlagsError( - "Must specify directory to upload via `--logdir`." - ) + return UploadIntent() + if cmd == flags_parser.SUBCOMMAND_KEY_UPDATE_METADATA: if flags.experiment_id: if flags.name is not None or flags.description is not None: diff --git a/tensorboard/uploader/uploader_subcommand_test.py b/tensorboard/uploader/uploader_subcommand_test.py index 347b807b10..1e4a065e08 100644 --- a/tensorboard/uploader/uploader_subcommand_test.py +++ b/tensorboard/uploader/uploader_subcommand_test.py @@ -23,189 +23,14 @@ from tensorboard.uploader.proto import experiment_pb2 from tensorboard.uploader.proto import server_info_pb2 -from tensorboard.uploader.proto import write_service_pb2 from tensorboard.uploader.proto import write_service_pb2_grpc -from tensorboard.uploader import dry_run_stubs from tensorboard.uploader import exporter as exporter_lib from tensorboard.uploader import uploader as uploader_lib from tensorboard.uploader import uploader_subcommand -from tensorboard.plugins.histogram import metadata as histograms_metadata -from tensorboard.plugins.graph import metadata as graphs_metadata -from tensorboard.plugins.scalar import metadata as scalars_metadata from tensorboard.plugins import base_plugin -# By default allow at least one plugin for each upload type: Scalar, Tensor, and -# Blobs. -_SCALARS_HISTOGRAMS_AND_GRAPHS = frozenset( - ( - scalars_metadata.PLUGIN_NAME, - histograms_metadata.PLUGIN_NAME, - graphs_metadata.PLUGIN_NAME, - ) -) - - -class UploadIntentTest(tf.test.TestCase): - def testUploadIntentOneShotEmptyDirectoryFails(self): - """Test the upload intent under the one-shot mode with missing dir. - - In the case of a non-existent directoy, uploading should not - create an experiment. - """ - # Mock three places: - # 1. The uploader itself, we will inspect invocations of its methods but - # do not want to actually upload anything. - # 2. Writing to stdout, so we can inspect messages to the user. - # 3. The creation of the grpc WriteServiceChannel, which happens in the - # non dry_run execution, but we don't want to actually open a network - # communication. - mock_uploader = mock.MagicMock() - mock_stdout_write = mock.MagicMock() - with mock.patch.object( - uploader_lib, - "TensorBoardUploader", - return_value=mock_uploader, - ), mock.patch.object( - sys.stdout, "write", mock_stdout_write - ), mock.patch.object( - write_service_pb2_grpc, "TensorBoardWriterServiceStub" - ): - # Set up an UploadIntent configured with one_shot and a - # non-existent directory. - intent = uploader_subcommand.UploadIntent( - "/dev/null/non/existent/directory", one_shot=True - ) - # Execute the intent.execute method. - intent.execute(server_info_pb2.ServerInfoResponse(), None) - # Expect that there is no call to create an experiment. - self.assertEqual(mock_uploader.create_experiment.call_count, 0) - # Expect a message to the user indicating no experiment was created. - stdout_writes = [x[0][0] for x in mock_stdout_write.call_args_list] - self.assertRegex( - ",".join(stdout_writes), - ".*Exiting without creating an experiment.*", - ) - - def testUploadIntentOneShot(self): - """Test the upload intent under the one-shot mode.""" - # Mock three places: - # 1. The uploader itself, we will inspect invocations of its methods but - # do not want to actually upload anything. - # 2. Writing to stdout, so we can inspect messages to the user. - # 3. The creation of the grpc WriteServiceChannel, which happens in the - # non dry_run execution, but we don't want to actually open a network - # communication. mock_uploader = mock.MagicMock() - mock_uploader = mock.MagicMock() - mock_uploader.create_experiment = mock.MagicMock( - return_value="fake_experiment_id" - ) - mock_stdout_write = mock.MagicMock() - with mock.patch.object( - sys.stdout, "write", mock_stdout_write - ), mock.patch.object( - uploader_lib, "TensorBoardUploader", return_value=mock_uploader - ), mock.patch.object( - write_service_pb2_grpc, "TensorBoardWriterServiceStub" - ): - # Set up an UploadIntent configured with one_shot and an empty temp - # directory. - intent = uploader_subcommand.UploadIntent( - self.get_temp_dir(), one_shot=True - ) - # Execute the intent.execute method. - intent.execute(server_info_pb2.ServerInfoResponse(), None) - # Expect that there is one call to create_experiment. - self.assertEqual(mock_uploader.create_experiment.call_count, 1) - # Expect that there is one call to start_uploading. - self.assertEqual(mock_uploader.start_uploading.call_count, 1) - # Expect that ".*Done scanning logdir.*" is among the things printed. - stdout_writes = [x[0][0] for x in mock_stdout_write.call_args_list] - self.assertRegex( - ",".join(stdout_writes), - ".*experiment created.*", - ) - # Expect that the last thing written is the string "Done" and the - # experiment_id. - self.assertRegex(stdout_writes[-1], ".*Done.*") - self.assertRegex(stdout_writes[-1], ".*fake_experiment_id.*") - - def testUploadIntentWithExperimentUrlCallback(self): - """Test the upload intent with a callback.""" - server_info = server_info_pb2.ServerInfoResponse() - server_info.url_format.template = "https://tensorboard.dev/x/{}" - server_info.url_format.id_placeholder = "{}" - - stub = dry_run_stubs.DryRunTensorBoardWriterStub() - stub.CreateExperiment = ( - lambda req, **__: write_service_pb2.CreateExperimentResponse( - experiment_id="test_experiment_id", url="this URL is ignored" - ) - ) - - expected_url = "https://tensorboard.dev/x/test_experiment_id" - - with mock.patch.object( - dry_run_stubs, - "DryRunTensorBoardWriterStub", - wraps=lambda: stub, - ), mock.patch.object(sys.stdout, "write"): - mock_channel = mock.Mock() - mock_experiment_url_callback = mock.Mock() - intent = uploader_subcommand.UploadIntent( - self.get_temp_dir(), - dry_run=True, - one_shot=True, - experiment_url_callback=mock_experiment_url_callback, - ) - intent.execute(server_info, mock_channel) - mock_experiment_url_callback.assert_called_once_with(expected_url) - - def testUploadIntentDryRunNonOneShotInterrupted(self): - mock_server_info = mock.MagicMock() - mock_channel = mock.MagicMock() - mock_stdout_write = mock.MagicMock() - mock_uploader = mock.MagicMock() - with mock.patch.object( - mock_uploader, - "start_uploading", - side_effect=KeyboardInterrupt(), - ), mock.patch.object( - uploader_lib, "TensorBoardUploader", return_value=mock_uploader - ), mock.patch.object( - sys.stdout, "write", mock_stdout_write - ): - intent = uploader_subcommand.UploadIntent( - self.get_temp_dir(), dry_run=True, one_shot=False - ) - intent.execute(mock_server_info, mock_channel) - self.assertRegex( - mock_stdout_write.call_args_list[-1][0][0], ".*Interrupted.*" - ) - - def testUploadIntentNonDryRunNonOneShotInterrupted(self): - mock_server_info = mock.MagicMock() - mock_channel = mock.MagicMock() - mock_stdout_write = mock.MagicMock() - mock_uploader = mock.MagicMock() - with mock.patch.object( - mock_uploader, - "start_uploading", - side_effect=KeyboardInterrupt(), - ), mock.patch.object( - uploader_lib, "TensorBoardUploader", return_value=mock_uploader - ), mock.patch.object( - sys.stdout, "write", mock_stdout_write - ): - intent = uploader_subcommand.UploadIntent( - self.get_temp_dir(), dry_run=False, one_shot=False - ) - intent.execute(mock_server_info, mock_channel) - self.assertIn( - "\nInterrupted. View your TensorBoard at ", - mock_stdout_write.call_args_list[-1][0][0], - ) - +class IntentTest(tf.test.TestCase): def testListIntentSetsExperimentMask(self): mock_server_info = mock.MagicMock() mock_channel = mock.MagicMock() diff --git a/tensorboard/uploader/uploader_test.py b/tensorboard/uploader/uploader_test.py index db9d7d48f4..eb9de9d009 100644 --- a/tensorboard/uploader/uploader_test.py +++ b/tensorboard/uploader/uploader_test.py @@ -16,8 +16,6 @@ import itertools -import os -import re from unittest import mock import grpc @@ -25,50 +23,11 @@ import tensorflow as tf -from google.protobuf import message -from tensorboard import data_compat -from tensorboard import dataclass_compat -from tensorboard.compat.proto import tensor_shape_pb2 from tensorboard.uploader.proto import experiment_pb2 -from tensorboard.uploader.proto import scalar_pb2 -from tensorboard.uploader.proto import server_info_pb2 from tensorboard.uploader.proto import write_service_pb2 from tensorboard.uploader.proto import write_service_pb2_grpc from tensorboard.uploader import test_util -from tensorboard.uploader import upload_tracker from tensorboard.uploader import uploader as uploader_lib -from tensorboard.uploader import logdir_loader -from tensorboard.uploader import util -from tensorboard.compat.proto import event_pb2 -from tensorboard.compat.proto import graph_pb2 -from tensorboard.compat.proto import summary_pb2 -from tensorboard.compat.proto import tensor_pb2 -from tensorboard.compat.proto import types_pb2 -from tensorboard.plugins.histogram import metadata as histograms_metadata -from tensorboard.plugins.histogram import summary_v2 as histogram_v2 -from tensorboard.plugins.graph import metadata as graphs_metadata -from tensorboard.plugins.scalar import metadata as scalars_metadata -from tensorboard.plugins.scalar import summary_v2 as scalar_v2 -from tensorboard.summary import v1 as summary_v1 -from tensorboard.util import test_util as tb_test_util -from tensorboard.util import tensor_util - - -def _create_example_graph_bytes(large_attr_size): - graph_def = graph_pb2.GraphDef() - graph_def.node.add(name="alice", op="Person") - graph_def.node.add(name="bob", op="Person") - - graph_def.node[1].attr["small"].s = b"small_attr_value" - graph_def.node[1].attr["large"].s = b"l" * large_attr_size - graph_def.node.add( - name="friendship", op="Friendship", input=["alice", "bob"] - ) - return graph_def.SerializeToString() - - -class AbortUploadError(Exception): - """Exception used in testing to abort the upload process.""" def _create_mock_client(): @@ -93,1782 +52,6 @@ def _create_mock_client(): return mock_client -# By default allow at least one plugin for each upload type: Scalar, Tensor, and -# Blobs. -_SCALARS_HISTOGRAMS_AND_GRAPHS = frozenset( - ( - scalars_metadata.PLUGIN_NAME, - histograms_metadata.PLUGIN_NAME, - graphs_metadata.PLUGIN_NAME, - ) -) - -# Sentinel for `_create_*` helpers, for arguments for which we want to -# supply a default other than the `None` used by the code under test. -_USE_DEFAULT = object() - - -def _create_uploader( - writer_client=_USE_DEFAULT, - logdir=None, - max_scalar_request_size=_USE_DEFAULT, - max_blob_request_size=_USE_DEFAULT, - max_blob_size=_USE_DEFAULT, - logdir_poll_rate_limiter=_USE_DEFAULT, - rpc_rate_limiter=_USE_DEFAULT, - tensor_rpc_rate_limiter=_USE_DEFAULT, - blob_rpc_rate_limiter=_USE_DEFAULT, - name=None, - description=None, - verbosity=0, # Use 0 to minimize littering the test output. - one_shot=None, -): - if writer_client is _USE_DEFAULT: - writer_client = _create_mock_client() - if max_scalar_request_size is _USE_DEFAULT: - max_scalar_request_size = 128000 - if max_blob_request_size is _USE_DEFAULT: - max_blob_request_size = 128000 - if max_blob_size is _USE_DEFAULT: - max_blob_size = 12345 - if logdir_poll_rate_limiter is _USE_DEFAULT: - logdir_poll_rate_limiter = util.RateLimiter(0) - if rpc_rate_limiter is _USE_DEFAULT: - rpc_rate_limiter = util.RateLimiter(0) - if tensor_rpc_rate_limiter is _USE_DEFAULT: - tensor_rpc_rate_limiter = util.RateLimiter(0) - if blob_rpc_rate_limiter is _USE_DEFAULT: - blob_rpc_rate_limiter = util.RateLimiter(0) - - upload_limits = server_info_pb2.UploadLimits( - max_scalar_request_size=max_scalar_request_size, - max_tensor_request_size=128000, - max_tensor_point_size=11111, - max_blob_request_size=max_blob_request_size, - max_blob_size=max_blob_size, - ) - - return uploader_lib.TensorBoardUploader( - writer_client, - logdir, - allowed_plugins=_SCALARS_HISTOGRAMS_AND_GRAPHS, - upload_limits=upload_limits, - logdir_poll_rate_limiter=logdir_poll_rate_limiter, - rpc_rate_limiter=rpc_rate_limiter, - tensor_rpc_rate_limiter=tensor_rpc_rate_limiter, - blob_rpc_rate_limiter=blob_rpc_rate_limiter, - name=name, - description=description, - verbosity=verbosity, - one_shot=one_shot, - ) - - -def _create_request_sender( - experiment_id=None, - api=None, - allowed_plugins=_USE_DEFAULT, -): - if api is _USE_DEFAULT: - api = _create_mock_client() - if allowed_plugins is _USE_DEFAULT: - allowed_plugins = _SCALARS_HISTOGRAMS_AND_GRAPHS - - upload_limits = server_info_pb2.UploadLimits( - max_scalar_request_size=128000, - max_tensor_request_size=128000, - max_tensor_point_size=11111, - max_blob_size=12345, - ) - - rpc_rate_limiter = util.RateLimiter(0) - tensor_rpc_rate_limiter = util.RateLimiter(0) - blob_rpc_rate_limiter = util.RateLimiter(0) - - return uploader_lib._BatchedRequestSender( - experiment_id=experiment_id, - api=api, - allowed_plugins=allowed_plugins, - upload_limits=upload_limits, - rpc_rate_limiter=rpc_rate_limiter, - tensor_rpc_rate_limiter=tensor_rpc_rate_limiter, - blob_rpc_rate_limiter=blob_rpc_rate_limiter, - tracker=upload_tracker.UploadTracker(verbosity=0), - ) - - -def _create_scalar_request_sender( - experiment_id=None, - api=_USE_DEFAULT, - max_request_size=_USE_DEFAULT, - tracker=None, -): - if api is _USE_DEFAULT: - api = _create_mock_client() - if max_request_size is _USE_DEFAULT: - max_request_size = 128000 - return uploader_lib._ScalarBatchedRequestSender( - experiment_id=experiment_id, - api=api, - rpc_rate_limiter=util.RateLimiter(0), - max_request_size=max_request_size, - tracker=tracker or upload_tracker.UploadTracker(verbosity=0), - ) - - -def _create_tensor_request_sender( - experiment_id=None, - api=_USE_DEFAULT, - max_request_size=_USE_DEFAULT, - max_tensor_point_size=_USE_DEFAULT, - tracker=None, -): - if api is _USE_DEFAULT: - api = _create_mock_client() - if max_request_size is _USE_DEFAULT: - max_request_size = 128000 - if max_tensor_point_size is _USE_DEFAULT: - max_tensor_point_size = 11111 - return uploader_lib._TensorBatchedRequestSender( - experiment_id=experiment_id, - api=api, - rpc_rate_limiter=util.RateLimiter(0), - max_request_size=max_request_size, - max_tensor_point_size=max_tensor_point_size, - tracker=tracker or upload_tracker.UploadTracker(verbosity=0), - ) - - -class TensorboardUploaderTest(tf.test.TestCase): - def test_create_experiment(self): - logdir = "/logs/foo" - uploader = _create_uploader(_create_mock_client(), logdir) - eid = uploader.create_experiment() - self.assertEqual(eid, "123") - - def test_create_experiment_with_name(self): - logdir = "/logs/foo" - mock_client = _create_mock_client() - new_name = "This is the new name" - uploader = _create_uploader(mock_client, logdir, name=new_name) - eid = uploader.create_experiment() - self.assertEqual(eid, "123") - mock_client.CreateExperiment.assert_called_once() - (args, _) = mock_client.CreateExperiment.call_args - - expected_request = write_service_pb2.CreateExperimentRequest( - name=new_name, - ) - self.assertEqual(args[0], expected_request) - - def test_create_experiment_with_description(self): - logdir = "/logs/foo" - mock_client = _create_mock_client() - new_description = """ - **description**" - may have "strange" unicode chars 🌴 \\/<> - """ - uploader = _create_uploader( - mock_client, logdir, description=new_description - ) - eid = uploader.create_experiment() - self.assertEqual(eid, "123") - mock_client.CreateExperiment.assert_called_once() - (args, _) = mock_client.CreateExperiment.call_args - - expected_request = write_service_pb2.CreateExperimentRequest( - description=new_description, - ) - self.assertEqual(args[0], expected_request) - - def test_create_experiment_with_all_metadata(self): - logdir = "/logs/foo" - mock_client = _create_mock_client() - new_description = """ - **description**" - may have "strange" unicode chars 🌴 \\/<> - """ - new_name = "This is a cool name." - uploader = _create_uploader( - mock_client, logdir, name=new_name, description=new_description - ) - eid = uploader.create_experiment() - self.assertEqual(eid, "123") - mock_client.CreateExperiment.assert_called_once() - (args, _) = mock_client.CreateExperiment.call_args - - expected_request = write_service_pb2.CreateExperimentRequest( - name=new_name, - description=new_description, - ) - self.assertEqual(args[0], expected_request) - - def test_start_uploading_without_create_experiment_fails(self): - mock_client = _create_mock_client() - uploader = _create_uploader(mock_client, "/logs/foo") - with self.assertRaisesRegex(RuntimeError, "call create_experiment()"): - uploader.start_uploading() - - def test_start_uploading_scalars(self): - mock_client = _create_mock_client() - mock_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_tensor_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_blob_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_tracker = mock.MagicMock() - with mock.patch.object( - upload_tracker, "UploadTracker", return_value=mock_tracker - ): - uploader = _create_uploader( - mock_client, - "/logs/foo", - # Send each Event below in a separate WriteScalarRequest - max_scalar_request_size=100, - rpc_rate_limiter=mock_rate_limiter, - tensor_rpc_rate_limiter=mock_tensor_rate_limiter, - blob_rpc_rate_limiter=mock_blob_rate_limiter, - verbosity=1, # In order to test the upload tracker. - ) - uploader.create_experiment() - - def scalar_event(tag, value): - return event_pb2.Event(summary=scalar_v2.scalar_pb(tag, value)) - - mock_logdir_loader = mock.create_autospec(logdir_loader.LogdirLoader) - mock_logdir_loader.get_run_events.side_effect = [ - { - "run 1": _apply_compat( - [scalar_event("1.1", 5.0), scalar_event("1.2", 5.0)] - ), - "run 2": _apply_compat( - [scalar_event("2.1", 5.0), scalar_event("2.2", 5.0)] - ), - }, - { - "run 3": _apply_compat( - [scalar_event("3.1", 5.0), scalar_event("3.2", 5.0)] - ), - "run 4": _apply_compat( - [scalar_event("4.1", 5.0), scalar_event("4.2", 5.0)] - ), - "run 5": _apply_compat( - [scalar_event("5.1", 5.0), scalar_event("5.2", 5.0)] - ), - }, - AbortUploadError, - ] - - with mock.patch.object( - uploader, "_logdir_loader", mock_logdir_loader - ), self.assertRaises(AbortUploadError): - uploader.start_uploading() - self.assertEqual(4 + 6, mock_client.WriteScalar.call_count) - self.assertEqual(4 + 6, mock_rate_limiter.tick.call_count) - self.assertEqual(0, mock_tensor_rate_limiter.tick.call_count) - self.assertEqual(0, mock_blob_rate_limiter.tick.call_count) - - # Check upload tracker calls. - self.assertEqual(mock_tracker.send_tracker.call_count, 2) - self.assertEqual(mock_tracker.scalars_tracker.call_count, 10) - self.assertLen(mock_tracker.scalars_tracker.call_args[0], 1) - self.assertEqual(mock_tracker.tensors_tracker.call_count, 0) - self.assertEqual(mock_tracker.blob_tracker.call_count, 0) - - def test_start_uploading_scalars_one_shot(self): - """Check that one-shot uploading stops without AbortUploadError.""" - mock_client = _create_mock_client() - mock_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_tensor_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_blob_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_tracker = mock.MagicMock() - with mock.patch.object( - upload_tracker, "UploadTracker", return_value=mock_tracker - ): - uploader = _create_uploader( - mock_client, - "/logs/foo", - # Send each Event below in a separate WriteScalarRequest - max_scalar_request_size=100, - rpc_rate_limiter=mock_rate_limiter, - tensor_rpc_rate_limiter=mock_tensor_rate_limiter, - blob_rpc_rate_limiter=mock_blob_rate_limiter, - verbosity=1, # In order to test the upload tracker. - one_shot=True, - ) - uploader.create_experiment() - - def scalar_event(tag, value): - return event_pb2.Event(summary=scalar_v2.scalar_pb(tag, value)) - - mock_logdir_loader = mock.create_autospec(logdir_loader.LogdirLoader) - mock_logdir_loader.get_run_events.side_effect = [ - { - "run 1": _apply_compat( - [scalar_event("1.1", 5.0), scalar_event("1.2", 5.0)] - ), - "run 2": _apply_compat( - [scalar_event("2.1", 5.0), scalar_event("2.2", 5.0)] - ), - }, - # Note the lack of AbortUploadError here. - ] - - with mock.patch.object(uploader, "_logdir_loader", mock_logdir_loader): - uploader.start_uploading() - - self.assertEqual(4, mock_client.WriteScalar.call_count) - self.assertEqual(4, mock_rate_limiter.tick.call_count) - self.assertEqual(0, mock_tensor_rate_limiter.tick.call_count) - self.assertEqual(0, mock_blob_rate_limiter.tick.call_count) - - # Check upload tracker calls. - self.assertEqual(mock_tracker.send_tracker.call_count, 1) - self.assertEqual(mock_tracker.scalars_tracker.call_count, 4) - self.assertLen(mock_tracker.scalars_tracker.call_args[0], 1) - self.assertEqual(mock_tracker.tensors_tracker.call_count, 0) - self.assertEqual(mock_tracker.blob_tracker.call_count, 0) - - def test_start_uploading_tensors(self): - mock_client = _create_mock_client() - mock_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_tensor_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_blob_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_tracker = mock.MagicMock() - with mock.patch.object( - upload_tracker, "UploadTracker", return_value=mock_tracker - ): - uploader = _create_uploader( - mock_client, - "/logs/foo", - rpc_rate_limiter=mock_rate_limiter, - tensor_rpc_rate_limiter=mock_tensor_rate_limiter, - blob_rpc_rate_limiter=mock_blob_rate_limiter, - verbosity=1, # In order to test the upload tracker. - ) - uploader.create_experiment() - - def tensor_event(tag, value): - return event_pb2.Event( - summary=histogram_v2.histogram_pb(tag, value) - ) - - mock_logdir_loader = mock.create_autospec(logdir_loader.LogdirLoader) - mock_logdir_loader.get_run_events.side_effect = [ - { - "run 1": _apply_compat( - [tensor_event("1.1", [5.0]), tensor_event("1.2", [5.0])] - ), - }, - AbortUploadError, - ] - - with mock.patch.object( - uploader, "_logdir_loader", mock_logdir_loader - ), self.assertRaises(AbortUploadError): - uploader.start_uploading() - self.assertEqual(1, mock_client.WriteTensor.call_count) - self.assertEqual(0, mock_rate_limiter.tick.call_count) - self.assertEqual(1, mock_tensor_rate_limiter.tick.call_count) - self.assertEqual(0, mock_blob_rate_limiter.tick.call_count) - - # Check upload tracker calls. - self.assertEqual(mock_tracker.send_tracker.call_count, 1) - self.assertEqual(mock_tracker.scalars_tracker.call_count, 0) - tensors_tracker = mock_tracker.tensors_tracker - self.assertEqual(tensors_tracker.call_count, 1) - self.assertLen(tensors_tracker.call_args[0], 4) - self.assertEqual(tensors_tracker.call_args[0][0], 2) # num_tensors - self.assertEqual( - tensors_tracker.call_args[0][1], 0 - ) # num_tensors_skipped - # tensor_bytes: avoid asserting the exact value as it's hard to reason about. - self.assertGreater(tensors_tracker.call_args[0][2], 0) - self.assertEqual( - tensors_tracker.call_args[0][3], 0 - ) # tensor_bytes_skipped - self.assertEqual(mock_tracker.blob_tracker.call_count, 0) - - def test_start_uploading_graphs(self): - mock_client = _create_mock_client() - mock_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_tensor_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_blob_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_tracker = mock.MagicMock() - with mock.patch.object( - upload_tracker, "UploadTracker", return_value=mock_tracker - ): - uploader = _create_uploader( - mock_client, - "/logs/foo", - # Verify behavior with lots of small chunks - max_blob_request_size=100, - rpc_rate_limiter=mock_rate_limiter, - tensor_rpc_rate_limiter=mock_tensor_rate_limiter, - blob_rpc_rate_limiter=mock_blob_rate_limiter, - verbosity=1, # In order to test tracker. - ) - uploader.create_experiment() - - # Of course a real Event stream will never produce the same Event twice, - # but is this test context it's fine to reuse this one. - graph_event = event_pb2.Event( - graph_def=_create_example_graph_bytes(950) - ) - expected_graph_def = graph_pb2.GraphDef.FromString( - graph_event.graph_def - ) - mock_logdir_loader = mock.create_autospec(logdir_loader.LogdirLoader) - mock_logdir_loader.get_run_events.side_effect = [ - { - "run 1": _apply_compat([graph_event, graph_event]), - "run 2": _apply_compat([graph_event, graph_event]), - }, - { - "run 3": _apply_compat([graph_event, graph_event]), - "run 4": _apply_compat([graph_event, graph_event]), - "run 5": _apply_compat([graph_event, graph_event]), - }, - AbortUploadError, - ] - - with mock.patch.object( - uploader, "_logdir_loader", mock_logdir_loader - ), self.assertRaises(AbortUploadError): - uploader.start_uploading() - self.assertEqual(1, mock_client.CreateExperiment.call_count) - self.assertEqual(10, mock_client.WriteBlob.call_count) - for (i, call) in enumerate(mock_client.WriteBlob.call_args_list): - requests = list(call[0][0]) - data = b"".join(r.data for r in requests) - actual_graph_def = graph_pb2.GraphDef.FromString(data) - self.assertProtoEquals(expected_graph_def, actual_graph_def) - self.assertEqual( - set(r.blob_sequence_id for r in requests), - {"blob%d" % i}, - ) - self.assertEqual(0, mock_rate_limiter.tick.call_count) - self.assertEqual(0, mock_tensor_rate_limiter.tick.call_count) - self.assertEqual(10, mock_blob_rate_limiter.tick.call_count) - - # Check upload tracker calls. - self.assertEqual(mock_tracker.send_tracker.call_count, 2) - self.assertEqual(mock_tracker.scalars_tracker.call_count, 0) - self.assertEqual(mock_tracker.tensors_tracker.call_count, 0) - self.assertEqual(mock_tracker.blob_tracker.call_count, 10) - self.assertLen(mock_tracker.blob_tracker.call_args[0], 1) - self.assertGreater(mock_tracker.blob_tracker.call_args[0][0], 0) - - def test_upload_skip_large_blob(self): - mock_client = _create_mock_client() - mock_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_blob_rate_limiter = mock.create_autospec(util.RateLimiter) - uploader = _create_uploader( - mock_client, - "/logs/foo", - # Verify behavior with lots of small chunks - max_blob_request_size=100, - max_blob_size=100, - rpc_rate_limiter=mock_rate_limiter, - blob_rpc_rate_limiter=mock_blob_rate_limiter, - ) - uploader.create_experiment() - - graph_event = event_pb2.Event( - graph_def=_create_example_graph_bytes(950) - ) - - mock_logdir_loader = mock.create_autospec(logdir_loader.LogdirLoader) - mock_logdir_loader.get_run_events.side_effect = [ - {"run 1": _apply_compat([graph_event])}, - AbortUploadError, - ] - - with mock.patch.object( - uploader, "_logdir_loader", mock_logdir_loader - ), self.assertRaises(AbortUploadError): - uploader.start_uploading() - self.assertEqual(1, mock_client.CreateExperiment.call_count) - self.assertEqual(0, mock_client.WriteBlob.call_count) - self.assertEqual(0, mock_rate_limiter.tick.call_count) - self.assertEqual(1, mock_blob_rate_limiter.tick.call_count) - - def test_filter_graphs(self): - # Three graphs: one short, one long, one corrupt. - bytes_0 = _create_example_graph_bytes(123) - bytes_1 = _create_example_graph_bytes(9999) - # invalid (truncated) proto: length-delimited field 1 (0x0a) of - # length 0x7f specified, but only len("bogus") = 5 bytes given - # - bytes_2 = b"\x0a\x7fbogus" - - logdir = self.get_temp_dir() - for (i, b) in enumerate([bytes_0, bytes_1, bytes_2]): - run_dir = os.path.join(logdir, "run_%04d" % i) - event = event_pb2.Event(step=0, wall_time=123 * i, graph_def=b) - with tb_test_util.FileWriter(run_dir) as writer: - writer.add_event(event) - - limiter = mock.create_autospec(util.RateLimiter) - limiter.tick.side_effect = [None, AbortUploadError] - mock_client = _create_mock_client() - uploader = _create_uploader( - mock_client, - logdir, - logdir_poll_rate_limiter=limiter, - ) - uploader.create_experiment() - - with self.assertRaises(AbortUploadError): - uploader.start_uploading() - - actual_blobs = [] - for call in mock_client.WriteBlob.call_args_list: - requests = call[0][0] - actual_blobs.append(b"".join(r.data for r in requests)) - - actual_graph_defs = [] - for blob in actual_blobs: - try: - actual_graph_defs.append(graph_pb2.GraphDef.FromString(blob)) - except message.DecodeError: - actual_graph_defs.append(None) - - with self.subTest("graphs with small attr values should be unchanged"): - expected_graph_def_0 = graph_pb2.GraphDef.FromString(bytes_0) - self.assertEqual(actual_graph_defs[0], expected_graph_def_0) - - with self.subTest("large attr values should be filtered out"): - expected_graph_def_1 = graph_pb2.GraphDef.FromString(bytes_1) - del expected_graph_def_1.node[1].attr["large"] - expected_graph_def_1.node[1].attr["_too_large_attrs"].list.s.append( - b"large" - ) - requests = list(mock_client.WriteBlob.call_args[0][0]) - self.assertEqual(actual_graph_defs[1], expected_graph_def_1) - - with self.subTest("corrupt graphs should be skipped"): - self.assertLen(actual_blobs, 2) - - def test_upload_server_error(self): - mock_client = _create_mock_client() - mock_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_blob_rate_limiter = mock.create_autospec(util.RateLimiter) - uploader = _create_uploader( - mock_client, - "/logs/foo", - rpc_rate_limiter=mock_rate_limiter, - blob_rpc_rate_limiter=mock_blob_rate_limiter, - ) - uploader.create_experiment() - - # Of course a real Event stream will never produce the same Event twice, - # but is this test context it's fine to reuse this one. - graph_event = event_pb2.Event( - graph_def=_create_example_graph_bytes(950) - ) - - mock_logdir_loader = mock.create_autospec(logdir_loader.LogdirLoader) - mock_logdir_loader.get_run_events.side_effect = [ - {"run 1": _apply_compat([graph_event])}, - {"run 1": _apply_compat([graph_event])}, - AbortUploadError, - ] - - mock_client.WriteBlob.side_effect = [ - [write_service_pb2.WriteBlobResponse()], - test_util.grpc_error(grpc.StatusCode.INTERNAL, "nope"), - ] - - # This demonstrates that the INTERNAL error is NOT handled, so the - # uploader will die if this happens. - with mock.patch.object( - uploader, "_logdir_loader", mock_logdir_loader - ), self.assertRaises(grpc.RpcError): - uploader.start_uploading() - self.assertEqual(1, mock_client.CreateExperiment.call_count) - self.assertEqual(2, mock_client.WriteBlob.call_count) - self.assertEqual(0, mock_rate_limiter.tick.call_count) - self.assertEqual(2, mock_blob_rate_limiter.tick.call_count) - - def test_upload_same_graph_twice(self): - mock_client = _create_mock_client() - mock_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_blob_rate_limiter = mock.create_autospec(util.RateLimiter) - uploader = _create_uploader( - mock_client, - "/logs/foo", - rpc_rate_limiter=mock_rate_limiter, - blob_rpc_rate_limiter=mock_blob_rate_limiter, - ) - uploader.create_experiment() - - graph_event = event_pb2.Event( - graph_def=_create_example_graph_bytes(950) - ) - - mock_logdir_loader = mock.create_autospec(logdir_loader.LogdirLoader) - mock_logdir_loader.get_run_events.side_effect = [ - {"run 1": _apply_compat([graph_event])}, - {"run 1": _apply_compat([graph_event])}, - AbortUploadError, - ] - - mock_client.WriteBlob.side_effect = [ - [write_service_pb2.WriteBlobResponse()], - test_util.grpc_error(grpc.StatusCode.ALREADY_EXISTS, "nope"), - ] - - # This demonstrates that the ALREADY_EXISTS error is handled gracefully. - with mock.patch.object( - uploader, "_logdir_loader", mock_logdir_loader - ), self.assertRaises(AbortUploadError): - uploader.start_uploading() - self.assertEqual(1, mock_client.CreateExperiment.call_count) - self.assertEqual(2, mock_client.WriteBlob.call_count) - self.assertEqual(0, mock_rate_limiter.tick.call_count) - self.assertEqual(2, mock_blob_rate_limiter.tick.call_count) - - def test_upload_empty_logdir(self): - logdir = self.get_temp_dir() - mock_client = _create_mock_client() - uploader = _create_uploader(mock_client, logdir) - uploader.create_experiment() - uploader._upload_once() - mock_client.WriteScalar.assert_not_called() - - def test_upload_polls_slowly_once_done(self): - class Success(Exception): - pass - - mock_rate_limiter = mock.create_autospec(util.RateLimiter) - upload_call_count = 0 - - def mock_upload_once(): - nonlocal upload_call_count - upload_call_count += 1 - tick_count = mock_rate_limiter.tick.call_count - self.assertEqual(tick_count, upload_call_count) - if tick_count >= 3: - raise Success() - - uploader = _create_uploader( - logdir=self.get_temp_dir(), - logdir_poll_rate_limiter=mock_rate_limiter, - ) - uploader._upload_once = mock_upload_once - - uploader.create_experiment() - with self.assertRaises(Success): - uploader.start_uploading() - - def test_upload_swallows_rpc_failure(self): - logdir = self.get_temp_dir() - with tb_test_util.FileWriter(logdir) as writer: - writer.add_test_summary("foo") - mock_client = _create_mock_client() - uploader = _create_uploader(mock_client, logdir) - uploader.create_experiment() - error = test_util.grpc_error(grpc.StatusCode.INTERNAL, "Failure") - mock_client.WriteScalar.side_effect = error - uploader._upload_once() - mock_client.WriteScalar.assert_called_once() - - def test_upload_full_logdir(self): - logdir = self.get_temp_dir() - mock_client = _create_mock_client() - uploader = _create_uploader(mock_client, logdir) - uploader.create_experiment() - - # Convenience helpers for constructing expected requests. - run = write_service_pb2.WriteScalarRequest.Run - tag = write_service_pb2.WriteScalarRequest.Tag - point = scalar_pb2.ScalarPoint - - # First round - writer = tb_test_util.FileWriter(logdir) - writer.add_test_summary("foo", simple_value=5.0, step=1) - writer.add_test_summary("foo", simple_value=6.0, step=2) - writer.add_test_summary("foo", simple_value=7.0, step=3) - writer.add_test_summary("bar", simple_value=8.0, step=3) - writer.flush() - writer_a = tb_test_util.FileWriter(os.path.join(logdir, "a")) - writer_a.add_test_summary("qux", simple_value=9.0, step=2) - writer_a.flush() - uploader._upload_once() - self.assertEqual(1, mock_client.WriteScalar.call_count) - request1 = mock_client.WriteScalar.call_args[0][0] - _clear_wall_times(request1) - expected_request1 = write_service_pb2.WriteScalarRequest( - experiment_id="123", - runs=[ - run( - name=".", - tags=[ - tag( - name="foo", - metadata=test_util.scalar_metadata("foo"), - points=[ - point(step=1, value=5.0), - point(step=2, value=6.0), - point(step=3, value=7.0), - ], - ), - tag( - name="bar", - metadata=test_util.scalar_metadata("bar"), - points=[point(step=3, value=8.0)], - ), - ], - ), - run( - name="a", - tags=[ - tag( - name="qux", - metadata=test_util.scalar_metadata("qux"), - points=[point(step=2, value=9.0)], - ) - ], - ), - ], - ) - self.assertProtoEquals(expected_request1, request1) - mock_client.WriteScalar.reset_mock() - - # Second round - writer.add_test_summary("foo", simple_value=10.0, step=5) - writer.add_test_summary("baz", simple_value=11.0, step=1) - writer.flush() - writer_b = tb_test_util.FileWriter(os.path.join(logdir, "b")) - writer_b.add_test_summary("xyz", simple_value=12.0, step=1) - writer_b.flush() - uploader._upload_once() - self.assertEqual(1, mock_client.WriteScalar.call_count) - request2 = mock_client.WriteScalar.call_args[0][0] - _clear_wall_times(request2) - expected_request2 = write_service_pb2.WriteScalarRequest( - experiment_id="123", - runs=[ - run( - name=".", - tags=[ - tag( - name="foo", - metadata=test_util.scalar_metadata("foo"), - points=[point(step=5, value=10.0)], - ), - tag( - name="baz", - metadata=test_util.scalar_metadata("baz"), - points=[point(step=1, value=11.0)], - ), - ], - ), - run( - name="b", - tags=[ - tag( - name="xyz", - metadata=test_util.scalar_metadata("xyz"), - points=[point(step=1, value=12.0)], - ) - ], - ), - ], - ) - self.assertProtoEquals(expected_request2, request2) - mock_client.WriteScalar.reset_mock() - - # Empty third round - uploader._upload_once() - mock_client.WriteScalar.assert_not_called() - - def test_verbosity_zero_creates_upload_tracker_with_verbosity_zero(self): - mock_client = _create_mock_client() - mock_tracker = mock.MagicMock() - with mock.patch.object( - upload_tracker, "UploadTracker", return_value=mock_tracker - ) as mock_constructor: - uploader = _create_uploader( - mock_client, - "/logs/foo", - verbosity=0, # Explicitly set verbosity to 0. - ) - uploader.create_experiment() - - def scalar_event(tag, value): - return event_pb2.Event(summary=scalar_v2.scalar_pb(tag, value)) - - mock_logdir_loader = mock.create_autospec(logdir_loader.LogdirLoader) - mock_logdir_loader.get_run_events.side_effect = [ - { - "run 1": _apply_compat( - [scalar_event("1.1", 5.0), scalar_event("1.2", 5.0)] - ), - }, - AbortUploadError, - ] - - with mock.patch.object( - uploader, "_logdir_loader", mock_logdir_loader - ), self.assertRaises(AbortUploadError): - uploader.start_uploading() - - self.assertEqual(mock_constructor.call_count, 1) - self.assertEqual( - mock_constructor.call_args[1], {"verbosity": 0, "one_shot": False} - ) - self.assertEqual(mock_tracker.scalars_tracker.call_count, 1) - - -class BatchedRequestSenderTest(tf.test.TestCase): - def _populate_run_from_events( - self, scalar_run, tensor_run, events, allowed_plugins=_USE_DEFAULT - ): - mock_client = _create_mock_client() - builder = _create_request_sender( - experiment_id="123", - api=mock_client, - allowed_plugins=allowed_plugins, - ) - builder.send_requests({"": _apply_compat(events)}) - scalar_requests = [ - c[0][0] for c in mock_client.WriteScalar.call_args_list - ] - if scalar_requests: - self.assertLen(scalar_requests, 1) - self.assertLen(scalar_requests[0].runs, 1) - scalar_run.MergeFrom(scalar_requests[0].runs[0]) - tensor_requests = [ - c[0][0] for c in mock_client.WriteTensor.call_args_list - ] - if tensor_requests: - self.assertLen(tensor_requests, 1) - self.assertLen(tensor_requests[0].runs, 1) - tensor_run.MergeFrom(tensor_requests[0].runs[0]) - - def test_empty_events(self): - scalar_run = write_service_pb2.WriteScalarRequest.Run() - tensor_run = write_service_pb2.WriteTensorRequest.Run() - self._populate_run_from_events(scalar_run, tensor_run, []) - self.assertProtoEquals( - scalar_run, write_service_pb2.WriteScalarRequest.Run() - ) - self.assertProtoEquals( - tensor_run, write_service_pb2.WriteTensorRequest.Run() - ) - - def test_scalar_and_tensor_events(self): - events = [ - event_pb2.Event(summary=scalar_v2.scalar_pb("scalar1", 5.0)), - event_pb2.Event(summary=scalar_v2.scalar_pb("scalar2", 5.0)), - event_pb2.Event( - summary=histogram_v2.histogram_pb("histogram", [5.0]) - ), - event_pb2.Event( - summary=histogram_v2.histogram_pb("histogram", [6.0]) - ), - ] - scalar_run = write_service_pb2.WriteScalarRequest.Run() - tensor_run = write_service_pb2.WriteTensorRequest.Run() - self._populate_run_from_events(scalar_run, tensor_run, events) - scalar_tag_counts = _extract_tag_counts(scalar_run) - self.assertEqual(scalar_tag_counts, {"scalar1": 1, "scalar2": 1}) - tensor_tag_counts = _extract_tag_counts(tensor_run) - self.assertEqual(tensor_tag_counts, {"histogram": 2}) - - def test_skips_non_scalar_and_non_tensor_events(self): - events = [ - event_pb2.Event(summary=scalar_v2.scalar_pb("scalar1", 5.0)), - event_pb2.Event(file_version="brain.Event:2"), - event_pb2.Event( - summary=histogram_v2.histogram_pb("histogram", [5.0]) - ), - ] - scalar_run = write_service_pb2.WriteScalarRequest.Run() - tensor_run = write_service_pb2.WriteTensorRequest.Run() - self._populate_run_from_events(scalar_run, tensor_run, events) - scalar_tag_counts = _extract_tag_counts(scalar_run) - self.assertEqual(scalar_tag_counts, {"scalar1": 1}) - tensor_tag_counts = _extract_tag_counts(tensor_run) - self.assertEqual(tensor_tag_counts, {"histogram": 1}) - - def test_skips_non_scalar_events_in_scalar_time_series(self): - events = [ - event_pb2.Event(file_version="brain.Event:2"), - event_pb2.Event(summary=scalar_v2.scalar_pb("scalar1", 5.0)), - event_pb2.Event(summary=scalar_v2.scalar_pb("scalar2", 5.0)), - event_pb2.Event( - summary=histogram_v2.histogram_pb("scalar2", [5.0]) - ), - ] - scalar_run = write_service_pb2.WriteScalarRequest.Run() - tensor_run = write_service_pb2.WriteTensorRequest.Run() - self._populate_run_from_events(scalar_run, tensor_run, events) - scalar_tag_counts = _extract_tag_counts(scalar_run) - self.assertEqual(scalar_tag_counts, {"scalar1": 1, "scalar2": 1}) - tensor_tag_counts = _extract_tag_counts(tensor_run) - self.assertEqual(tensor_tag_counts, {}) - - def test_skips_events_from_disallowed_plugins(self): - event = event_pb2.Event( - step=1, wall_time=123.456, summary=scalar_v2.scalar_pb("foo", 5.0) - ) - scalar_run = write_service_pb2.WriteScalarRequest.Run() - tensor_run = write_service_pb2.WriteTensorRequest.Run() - self._populate_run_from_events( - scalar_run, - tensor_run, - [event], - allowed_plugins=frozenset("not-scalars"), - ) - expected_scalar_run = write_service_pb2.WriteScalarRequest.Run() - self.assertProtoEquals(scalar_run, expected_scalar_run) - expected_tensor_run = write_service_pb2.WriteTensorRequest.Run() - self.assertProtoEquals(tensor_run, expected_tensor_run) - - def test_remembers_first_metadata_in_time_series(self): - scalar_1 = event_pb2.Event(summary=scalar_v2.scalar_pb("loss", 4.0)) - scalar_2 = event_pb2.Event(summary=scalar_v2.scalar_pb("loss", 3.0)) - scalar_2.summary.value[0].ClearField("metadata") - events = [ - event_pb2.Event(file_version="brain.Event:2"), - scalar_1, - scalar_2, - ] - scalar_run = write_service_pb2.WriteScalarRequest.Run() - tensor_run = write_service_pb2.WriteTensorRequest.Run() - self._populate_run_from_events(scalar_run, tensor_run, events) - scalar_tag_counts = _extract_tag_counts(scalar_run) - self.assertEqual(scalar_tag_counts, {"loss": 2}) - - def test_expands_multiple_values_in_event(self): - event = event_pb2.Event(step=1, wall_time=123.456) - event.summary.value.add(tag="foo", simple_value=1.0) - event.summary.value.add(tag="foo", simple_value=2.0) - event.summary.value.add(tag="foo", simple_value=3.0) - scalar_run = write_service_pb2.WriteScalarRequest.Run() - tensor_run = write_service_pb2.WriteTensorRequest.Run() - self._populate_run_from_events(scalar_run, tensor_run, [event]) - expected_scalar_run = write_service_pb2.WriteScalarRequest.Run() - foo_tag = expected_scalar_run.tags.add() - foo_tag.name = "foo" - foo_tag.metadata.display_name = "foo" - foo_tag.metadata.plugin_data.plugin_name = "scalars" - foo_tag.metadata.data_class = summary_pb2.DATA_CLASS_SCALAR - foo_tag.points.add( - step=1, wall_time=test_util.timestamp_pb(123456000000), value=1.0 - ) - foo_tag.points.add( - step=1, wall_time=test_util.timestamp_pb(123456000000), value=2.0 - ) - foo_tag.points.add( - step=1, wall_time=test_util.timestamp_pb(123456000000), value=3.0 - ) - self.assertProtoEquals(scalar_run, expected_scalar_run) - - -class ScalarBatchedRequestSenderTest(tf.test.TestCase): - def _add_events(self, sender, run_name, events): - for event in events: - for value in event.summary.value: - sender.add_event(run_name, event, value, value.metadata) - - def _add_events_and_flush(self, events): - mock_client = _create_mock_client() - sender = _create_scalar_request_sender( - experiment_id="123", - api=mock_client, - ) - self._add_events(sender, "", events) - sender.flush() - - requests = [c[0][0] for c in mock_client.WriteScalar.call_args_list] - self.assertLen(requests, 1) - self.assertLen(requests[0].runs, 1) - return requests[0].runs[0] - - def test_aggregation_by_tag(self): - def make_event(step, wall_time, tag, value): - return event_pb2.Event( - step=step, - wall_time=wall_time, - summary=scalar_v2.scalar_pb(tag, value), - ) - - events = [ - make_event(1, 1.0, "one", 11.0), - make_event(1, 2.0, "two", 22.0), - make_event(2, 3.0, "one", 33.0), - make_event(2, 4.0, "two", 44.0), - make_event( - 1, 5.0, "one", 55.0 - ), # Should preserve duplicate step=1. - make_event(1, 6.0, "three", 66.0), - ] - run_proto = self._add_events_and_flush(events) - tag_data = { - tag.name: [ - (p.step, p.wall_time.ToSeconds(), p.value) for p in tag.points - ] - for tag in run_proto.tags - } - self.assertEqual( - tag_data, - { - "one": [(1, 1.0, 11.0), (2, 3.0, 33.0), (1, 5.0, 55.0)], - "two": [(1, 2.0, 22.0), (2, 4.0, 44.0)], - "three": [(1, 6.0, 66.0)], - }, - ) - - def test_v1_summary(self): - event = event_pb2.Event(step=1, wall_time=123.456) - event.summary.value.add(tag="foo", simple_value=5.0) - run_proto = self._add_events_and_flush(_apply_compat([event])) - expected_run_proto = write_service_pb2.WriteScalarRequest.Run() - foo_tag = expected_run_proto.tags.add() - foo_tag.name = "foo" - foo_tag.metadata.display_name = "foo" - foo_tag.metadata.plugin_data.plugin_name = "scalars" - foo_tag.metadata.data_class = summary_pb2.DATA_CLASS_SCALAR - foo_tag.points.add( - step=1, wall_time=test_util.timestamp_pb(123456000000), value=5.0 - ) - self.assertProtoEquals(run_proto, expected_run_proto) - - def test_v1_summary_tb_summary(self): - tf_summary = summary_v1.scalar_pb("foo", 5.0) - tb_summary = summary_pb2.Summary.FromString( - tf_summary.SerializeToString() - ) - event = event_pb2.Event(step=1, wall_time=123.456, summary=tb_summary) - run_proto = self._add_events_and_flush(_apply_compat([event])) - expected_run_proto = write_service_pb2.WriteScalarRequest.Run() - foo_tag = expected_run_proto.tags.add() - foo_tag.name = "foo/scalar_summary" - foo_tag.metadata.display_name = "foo" - foo_tag.metadata.plugin_data.plugin_name = "scalars" - foo_tag.metadata.data_class = summary_pb2.DATA_CLASS_SCALAR - foo_tag.points.add( - step=1, wall_time=test_util.timestamp_pb(123456000000), value=5.0 - ) - self.assertProtoEquals(run_proto, expected_run_proto) - - def test_v2_summary(self): - event = event_pb2.Event( - step=1, wall_time=123.456, summary=scalar_v2.scalar_pb("foo", 5.0) - ) - run_proto = self._add_events_and_flush(_apply_compat([event])) - expected_run_proto = write_service_pb2.WriteScalarRequest.Run() - foo_tag = expected_run_proto.tags.add() - foo_tag.name = "foo" - foo_tag.metadata.plugin_data.plugin_name = "scalars" - foo_tag.metadata.data_class = summary_pb2.DATA_CLASS_SCALAR - foo_tag.points.add( - step=1, wall_time=test_util.timestamp_pb(123456000000), value=5.0 - ) - self.assertProtoEquals(run_proto, expected_run_proto) - - def test_propagates_experiment_deletion(self): - event = event_pb2.Event(step=1) - event.summary.value.add(tag="foo", simple_value=1.0) - - mock_client = _create_mock_client() - sender = _create_scalar_request_sender("123", mock_client) - self._add_events(sender, "run", _apply_compat([event])) - - error = test_util.grpc_error(grpc.StatusCode.NOT_FOUND, "nope") - mock_client.WriteScalar.side_effect = error - with self.assertRaises(uploader_lib.ExperimentNotFoundError): - sender.flush() - - def test_no_budget_for_base_request(self): - mock_client = _create_mock_client() - long_experiment_id = "A" * 12 - with self.assertRaises(RuntimeError) as cm: - _create_scalar_request_sender( - experiment_id=long_experiment_id, - api=mock_client, - max_request_size=12, - ) - self.assertEqual( - str(cm.exception), "Byte budget too small for base request" - ) - - def test_no_room_for_single_point(self): - mock_client = _create_mock_client() - event = event_pb2.Event(step=1, wall_time=123.456) - event.summary.value.add(tag="foo", simple_value=1.0) - long_run_name = "A" * 12 - sender = _create_scalar_request_sender( - "123", mock_client, max_request_size=12 - ) - with self.assertRaises(RuntimeError) as cm: - self._add_events(sender, long_run_name, [event]) - self.assertEqual(str(cm.exception), "add_event failed despite flush") - - def test_break_at_run_boundary(self): - mock_client = _create_mock_client() - # Choose run name sizes such that one run fits in a 1024 byte request, - # but not two. - long_run_1 = "A" * 768 - long_run_2 = "B" * 768 - event_1 = event_pb2.Event(step=1) - event_1.summary.value.add(tag="foo", simple_value=1.0) - event_2 = event_pb2.Event(step=2) - event_2.summary.value.add(tag="bar", simple_value=-2.0) - - sender = _create_scalar_request_sender( - "123", - mock_client, - # Set a limit to request size - max_request_size=1024, - ) - self._add_events(sender, long_run_1, _apply_compat([event_1])) - self._add_events(sender, long_run_2, _apply_compat([event_2])) - sender.flush() - requests = [c[0][0] for c in mock_client.WriteScalar.call_args_list] - - for request in requests: - _clear_wall_times(request) - - # Expect two RPC calls despite a single explicit call to flush(). - expected = [ - write_service_pb2.WriteScalarRequest(experiment_id="123"), - write_service_pb2.WriteScalarRequest(experiment_id="123"), - ] - ( - expected[0] - .runs.add(name=long_run_1) - .tags.add(name="foo", metadata=test_util.scalar_metadata("foo")) - .points.add(step=1, value=1.0) - ) - ( - expected[1] - .runs.add(name=long_run_2) - .tags.add(name="bar", metadata=test_util.scalar_metadata("bar")) - .points.add(step=2, value=-2.0) - ) - self.assertEqual(requests, expected) - - def test_break_at_tag_boundary(self): - mock_client = _create_mock_client() - # Choose tag name sizes such that one tag fits in a 1024 byte requst, - # but not two. Note that tag names appear in both `Tag.name` and the - # summary metadata. - long_tag_1 = "a" * 384 - long_tag_2 = "b" * 384 - event = event_pb2.Event(step=1) - event.summary.value.add(tag=long_tag_1, simple_value=1.0) - event.summary.value.add(tag=long_tag_2, simple_value=2.0) - - sender = _create_scalar_request_sender( - "123", - mock_client, - # Set a limit to request size - max_request_size=1024, - ) - self._add_events(sender, "train", _apply_compat([event])) - sender.flush() - requests = [c[0][0] for c in mock_client.WriteScalar.call_args_list] - for request in requests: - _clear_wall_times(request) - - # Expect two RPC calls despite a single explicit call to flush(). - expected = [ - write_service_pb2.WriteScalarRequest(experiment_id="123"), - write_service_pb2.WriteScalarRequest(experiment_id="123"), - ] - ( - expected[0] - .runs.add(name="train") - .tags.add( - name=long_tag_1, metadata=test_util.scalar_metadata(long_tag_1) - ) - .points.add(step=1, value=1.0) - ) - ( - expected[1] - .runs.add(name="train") - .tags.add( - name=long_tag_2, metadata=test_util.scalar_metadata(long_tag_2) - ) - .points.add(step=1, value=2.0) - ) - self.assertEqual(requests, expected) - - def test_break_at_scalar_point_boundary(self): - mock_client = _create_mock_client() - point_count = 2000 # comfortably saturates a single 1024-byte request - events = [] - for step in range(point_count): - summary = scalar_v2.scalar_pb("loss", -2.0 * step) - if step > 0: - summary.value[0].ClearField("metadata") - events.append(event_pb2.Event(summary=summary, step=step)) - tracker = upload_tracker.UploadTracker(verbosity=0) - sender = _create_scalar_request_sender( - "123", - mock_client, - # Set a limit to request size - max_request_size=1024, - tracker=tracker, - ) - self._add_events(sender, "train", _apply_compat(events)) - sender.flush() - requests = [c[0][0] for c in mock_client.WriteScalar.call_args_list] - for request in requests: - _clear_wall_times(request) - - self.assertGreater(len(requests), 1) - self.assertLess(len(requests), point_count) - # This is the observed number of requests when running the test. There - # is no reasonable way to derive this value from just reading the code. - # The number of requests does not have to be 33 to be correct but if it - # changes it probably warrants some investigation or thought. - self.assertEqual(33, len(requests)) - - total_points_in_result = 0 - for request in requests: - self.assertLen(request.runs, 1) - run = request.runs[0] - self.assertEqual(run.name, "train") - self.assertLen(run.tags, 1) - tag = run.tags[0] - self.assertEqual(tag.name, "loss") - for point in tag.points: - self.assertEqual(point.step, total_points_in_result) - self.assertEqual(point.value, -2.0 * point.step) - total_points_in_result += 1 - self.assertLessEqual(request.ByteSize(), 1024) - self.assertEqual(total_points_in_result, point_count) - with self.subTest("Scalar report count correct."): - self.assertEqual(tracker._stats.num_scalars, point_count) - - def test_prunes_tags_and_runs(self): - mock_client = _create_mock_client() - event_1 = event_pb2.Event(step=1) - event_1.summary.value.add(tag="foo", simple_value=1.0) - event_2 = event_pb2.Event(step=2) - event_2.summary.value.add(tag="bar", simple_value=-2.0) - - add_point_call_count = 0 - - def mock_add_point(byte_budget_manager_self, point): - # Simulate out-of-space error the first time that we try to store - # the second point. - nonlocal add_point_call_count - add_point_call_count += 1 - if add_point_call_count == 2: - raise uploader_lib._OutOfSpaceError() - - with mock.patch.object( - uploader_lib._ByteBudgetManager, - "add_point", - mock_add_point, - ): - sender = _create_scalar_request_sender("123", mock_client) - self._add_events(sender, "train", _apply_compat([event_1])) - self._add_events(sender, "test", _apply_compat([event_2])) - sender.flush() - requests = [c[0][0] for c in mock_client.WriteScalar.call_args_list] - for request in requests: - _clear_wall_times(request) - - expected = [ - write_service_pb2.WriteScalarRequest(experiment_id="123"), - write_service_pb2.WriteScalarRequest(experiment_id="123"), - ] - ( - expected[0] - .runs.add(name="train") - .tags.add(name="foo", metadata=test_util.scalar_metadata("foo")) - .points.add(step=1, value=1.0) - ) - ( - expected[1] - .runs.add(name="test") - .tags.add(name="bar", metadata=test_util.scalar_metadata("bar")) - .points.add(step=2, value=-2.0) - ) - self.assertEqual(expected, requests) - - def test_wall_time_precision(self): - # Test a wall time that is exactly representable in float64 but has enough - # digits to incur error if converted to nanoseconds the naive way (* 1e9). - event1 = event_pb2.Event(step=1, wall_time=1567808404.765432119) - event1.summary.value.add(tag="foo", simple_value=1.0) - # Test a wall time where as a float64, the fractional part on its own will - # introduce error if truncated to 9 decimal places instead of rounded. - event2 = event_pb2.Event(step=2, wall_time=1.000000002) - event2.summary.value.add(tag="foo", simple_value=2.0) - run_proto = self._add_events_and_flush(_apply_compat([event1, event2])) - self.assertEqual( - test_util.timestamp_pb(1567808404765432119), - run_proto.tags[0].points[0].wall_time, - ) - self.assertEqual( - test_util.timestamp_pb(1000000002), - run_proto.tags[0].points[1].wall_time, - ) - - -class TensorBatchedRequestSenderTest(tf.test.TestCase): - def _add_events(self, sender, run_name, events): - for event in events: - for value in event.summary.value: - sender.add_event(run_name, event, value, value.metadata) - - def _add_events_and_flush(self, events, max_tensor_point_size=_USE_DEFAULT): - mock_client = _create_mock_client() - sender = _create_tensor_request_sender( - experiment_id="123", - api=mock_client, - max_tensor_point_size=max_tensor_point_size, - ) - self._add_events(sender, "", events) - sender.flush() - - requests = [c[0][0] for c in mock_client.WriteTensor.call_args_list] - self.assertLen(requests, 1) - self.assertLen(requests[0].runs, 1) - return requests[0].runs[0] - - def test_histogram_event(self): - event = event_pb2.Event( - step=1, - wall_time=123.456, - summary=histogram_v2.histogram_pb("foo", [1.0]), - ) - - run_proto = self._add_events_and_flush(_apply_compat([event])) - expected_run_proto = write_service_pb2.WriteTensorRequest.Run() - foo_tag = expected_run_proto.tags.add() - foo_tag.name = "foo" - foo_tag.metadata.plugin_data.plugin_name = "histograms" - foo_tag.metadata.data_class = summary_pb2.DATA_CLASS_TENSOR - foo_tag.points.add( - step=1, - wall_time=test_util.timestamp_pb(123456000000), - value=tensor_pb2.TensorProto(dtype=types_pb2.DT_DOUBLE), - ) - # Simplify the tensor value a bit before making assertions on it. - # We care that it is copied to the request but we don't need it to be - # an extensive test. - run_proto.tags[0].points[0].value.ClearField("tensor_shape") - run_proto.tags[0].points[0].value.ClearField("tensor_content") - self.assertProtoEquals(run_proto, expected_run_proto) - - def test_histogram_event_with_empty_tensor_content_errors_out(self): - event = event_pb2.Event(step=42) - event.summary.value.add( - tag="one", - tensor=tensor_pb2.TensorProto( - dtype=types_pb2.DT_DOUBLE, - # Use empty tensor content to elicit an error. - tensor_content=b"", - ), - ) - - mock_client = _create_mock_client() - sender = _create_tensor_request_sender("123", mock_client) - with self.assertRaisesRegex( - ValueError, - re.compile( - r"failed to upload a tensor.*malformation.*tag.*\'one\'.*step.*42", - re.DOTALL, - ), - ): - self._add_events(sender, "run", _apply_compat([event])) - - def test_histogram_event_with_incorrect_tensor_shape_errors_out(self): - event = event_pb2.Event(step=1337) - tensor_proto = tensor_util.make_tensor_proto([1.0, 2.0]) - # Add an extraneous dimension to the tensor shape in order to - # elicit an error. - tensor_proto.tensor_shape.dim.append( - tensor_shape_pb2.TensorShapeProto.Dim(size=2) - ) - event.summary.value.add(tag="two", tensor=tensor_proto) - - mock_client = _create_mock_client() - sender = _create_tensor_request_sender("123", mock_client) - with self.assertRaisesRegex( - ValueError, - re.compile( - r"failed to upload a tensor.*malformation.*tag.*\'two\'.*step.*1337." - r"*shape", - re.DOTALL, - ), - ): - self._add_events(sender, "run", _apply_compat([event])) - - def test_aggregation_by_tag(self): - def make_event(step, wall_time, tag): - event = event_pb2.Event(step=step, wall_time=wall_time) - event.summary.value.add( - tag=tag, - tensor=tensor_pb2.TensorProto( - dtype=types_pb2.DT_DOUBLE, double_val=[1.0] - ), - ) - return event - - events = [ - make_event(1, 1.0, "one"), - make_event(1, 2.0, "two"), - make_event(2, 3.0, "one"), - make_event(2, 4.0, "two"), - make_event(1, 5.0, "one"), # Should preserve duplicate step=1. - make_event(1, 6.0, "three"), - ] - run_proto = self._add_events_and_flush(events) - tag_data = { - tag.name: [(p.step, p.wall_time.ToSeconds()) for p in tag.points] - for tag in run_proto.tags - } - self.assertEqual( - tag_data, - { - "one": [(1, 1.0), (2, 3.0), (1, 5.0)], - "two": [(1, 2.0), (2, 4.0)], - "three": [(1, 6.0)], - }, - ) - - def test_propagates_experiment_deletion(self): - event = event_pb2.Event(step=1) - event.summary.value.add( - tag="one", - tensor=tensor_pb2.TensorProto( - dtype=types_pb2.DT_DOUBLE, double_val=[1.0] - ), - ) - - mock_client = _create_mock_client() - sender = _create_tensor_request_sender("123", mock_client) - self._add_events(sender, "run", _apply_compat([event])) - - error = test_util.grpc_error(grpc.StatusCode.NOT_FOUND, "nope") - mock_client.WriteTensor.side_effect = error - with self.assertRaises(uploader_lib.ExperimentNotFoundError): - sender.flush() - - def test_no_budget_for_base_request(self): - mock_client = _create_mock_client() - long_experiment_id = "A" * 12 - with self.assertRaises(RuntimeError) as cm: - _create_tensor_request_sender( - experiment_id=long_experiment_id, - api=mock_client, - max_request_size=12, - ) - self.assertEqual( - str(cm.exception), "Byte budget too small for base request" - ) - - def test_no_room_for_single_point(self): - mock_client = _create_mock_client() - event = event_pb2.Event(step=1) - event.summary.value.add( - tag="one", - tensor=tensor_pb2.TensorProto( - dtype=types_pb2.DT_DOUBLE, double_val=[1.0] - ), - ) - long_run_name = "A" * 12 - sender = _create_tensor_request_sender( - "123", mock_client, max_request_size=12 - ) - with self.assertRaises(RuntimeError) as cm: - self._add_events(sender, long_run_name, [event]) - self.assertEqual(str(cm.exception), "add_event failed despite flush") - - def test_break_at_run_boundary(self): - mock_client = _create_mock_client() - # Choose run name sizes such that one run fits in a 1024 byte request, - # but not two. - long_run_1 = "A" * 768 - long_run_2 = "B" * 768 - event_1 = event_pb2.Event(step=1) - event_1.summary.value.add( - tag="one", - tensor=tensor_pb2.TensorProto( - dtype=types_pb2.DT_DOUBLE, double_val=[1.0] - ), - ) - event_2 = event_pb2.Event(step=2) - event_2.summary.value.add( - tag="two", - tensor=tensor_pb2.TensorProto( - dtype=types_pb2.DT_DOUBLE, double_val=[2.0] - ), - ) - - sender = _create_tensor_request_sender( - "123", - mock_client, - # Set a limit to request size - max_request_size=1024, - ) - self._add_events(sender, long_run_1, _apply_compat([event_1])) - self._add_events(sender, long_run_2, _apply_compat([event_2])) - sender.flush() - requests = [c[0][0] for c in mock_client.WriteTensor.call_args_list] - - # Expect two RPC calls despite a single explicit call to flush(). - self.assertEqual(2, len(requests)) - self.assertEqual(1, len(requests[0].runs)) - self.assertEqual(long_run_1, requests[0].runs[0].name) - self.assertEqual(1, len(requests[1].runs)) - self.assertEqual(long_run_2, requests[1].runs[0].name) - - def test_break_at_tag_boundary(self): - mock_client = _create_mock_client() - # Choose tag name sizes such that one tag fits in a 1024 byte request, - # but not two. - long_tag_1 = "a" * 600 - long_tag_2 = "b" * 600 - event = event_pb2.Event(step=1, wall_time=1) - event.summary.value.add( - tag=long_tag_1, - tensor=tensor_pb2.TensorProto( - dtype=types_pb2.DT_DOUBLE, double_val=[1.0] - ), - ) - event.summary.value.add( - tag=long_tag_2, - tensor=tensor_pb2.TensorProto( - dtype=types_pb2.DT_DOUBLE, double_val=[2.0] - ), - ) - - sender = _create_tensor_request_sender( - "123", - mock_client, - # Set a limit to request size - max_request_size=1024, - ) - self._add_events(sender, "train", _apply_compat([event])) - sender.flush() - requests = [c[0][0] for c in mock_client.WriteTensor.call_args_list] - - # Expect two RPC calls despite a single explicit call to flush(). - self.assertEqual(2, len(requests)) - # First RPC contains one tag. - self.assertEqual(1, len(requests[0].runs)) - self.assertEqual("train", requests[0].runs[0].name) - self.assertEqual(1, len(requests[0].runs[0].tags)) - self.assertEqual(long_tag_1, requests[0].runs[0].tags[0].name) - # Second RPC contains the other tag. - self.assertEqual(1, len(requests[1].runs)) - self.assertEqual("train", requests[1].runs[0].name) - self.assertEqual(1, len(requests[1].runs[0].tags)) - self.assertEqual(long_tag_2, requests[1].runs[0].tags[0].name) - - def test_break_at_tensor_point_boundary(self): - mock_client = _create_mock_client() - point_count = 2000 # comfortably saturates a single 1024-byte request - events = [] - for step in range(point_count): - event = event_pb2.Event(step=step) - tensor_proto = tensor_pb2.TensorProto( - dtype=types_pb2.DT_DOUBLE, double_val=[1.0 * step, -1.0 * step] - ) - tensor_proto.tensor_shape.dim.append( - tensor_shape_pb2.TensorShapeProto.Dim(size=2) - ) - event.summary.value.add(tag="histo", tensor=tensor_proto) - events.append(event) - - tracker = upload_tracker.UploadTracker(verbosity=0) - sender = _create_tensor_request_sender( - "123", - mock_client, - # Set a limit to request size - max_request_size=1024, - tracker=tracker, - ) - self._add_events(sender, "train", _apply_compat(events)) - sender.flush() - requests = [c[0][0] for c in mock_client.WriteTensor.call_args_list] - - self.assertGreater(len(requests), 1) - self.assertLess(len(requests), point_count) - self.assertEqual(72, len(requests)) - - total_points_in_result = 0 - for request in requests: - self.assertLen(request.runs, 1) - run = request.runs[0] - self.assertEqual(run.name, "train") - self.assertLen(run.tags, 1) - tag = run.tags[0] - self.assertEqual(tag.name, "histo") - for point in tag.points: - self.assertEqual(point.step, total_points_in_result) - self.assertEqual( - point.value.double_val, - [1.0 * point.step, -1.0 * point.step], - ) - total_points_in_result += 1 - self.assertLessEqual(request.ByteSize(), 1024) - self.assertEqual(total_points_in_result, point_count) - with self.subTest("Tensor report count correct."): - self.assertEqual(tracker._stats.num_tensors, point_count) - - def test_strip_large_tensors(self): - # Generate test data with varying tensor point sizes. Use raw bytes. - event_1 = event_pb2.Event(step=1) - event_1.summary.value.add( - tag="one", - # This TensorProto has a byte size of 18. - tensor=tensor_util.make_tensor_proto([1.0, 2.0]), - ) - event_1.summary.value.add( - tag="two", - # This TensorProto has a byte size of 22. - tensor=tensor_util.make_tensor_proto([1.0, 2.0, 3.0]), - ) - # This TensorProto has a 12-byte tensor_content. - event_2 = event_pb2.Event(step=2) - event_2.summary.value.add( - tag="one", - # This TensorProto has a byte size of 18. - tensor=tensor_util.make_tensor_proto([2.0, 4.0]), - ) - event_2.summary.value.add( - tag="two", - # This TensorProto has a byte size of 26. - tensor=tensor_util.make_tensor_proto([1.0, 2.0, 3.0, 4.0]), - ) - - run_proto = self._add_events_and_flush( - _apply_compat([event_1, event_2]), - # Set threshold that will filter out the tensor point with 26 bytes - # of data and above. The additional byte is for proto overhead. - max_tensor_point_size=24, - ) - tag_data = { - tag.name: [(p.step, p.value.tensor_content) for p in tag.points] - for tag in run_proto.tags - } - # A single tensor point is filtered out. - self.assertEqual( - tag_data, - { - "one": [ - (1, b"\x00\x00\x80?\x00\x00\x00@"), - (2, b"\x00\x00\x00@\x00\x00\x80@"), - ], - "two": [(1, b"\x00\x00\x80?\x00\x00\x00@\x00\x00@@")], - }, - ) - - run_proto_2 = self._add_events_and_flush( - _apply_compat([event_1, event_2]), - # Set threshold that will filter out the tensor points with 22 and 26 - # bytes of data and above. The additional byte is for proto overhead. - max_tensor_point_size=20, - ) - tag_data_2 = { - tag.name: [(p.step, p.value.tensor_content) for p in tag.points] - for tag in run_proto_2.tags - } - # All tensor points from the same tag are filtered out, and the tag is pruned. - self.assertEqual( - tag_data_2, - { - "one": [ - (1, b"\x00\x00\x80?\x00\x00\x00@"), - (2, b"\x00\x00\x00@\x00\x00\x80@"), - ], - }, - ) - - def test_prunes_tags_and_runs(self): - mock_client = _create_mock_client() - event_1 = event_pb2.Event(step=1) - event_1.summary.value.add( - tag="one", - tensor=tensor_pb2.TensorProto( - dtype=types_pb2.DT_DOUBLE, double_val=[1.0] - ), - ) - event_2 = event_pb2.Event(step=2) - event_2.summary.value.add( - tag="two", - tensor=tensor_pb2.TensorProto( - dtype=types_pb2.DT_DOUBLE, double_val=[2.0] - ), - ) - - add_point_call_count = 0 - - def mock_add_point(byte_budget_manager_self, point): - # Simulate out-of-space error the first time that we try to store - # the second point. - nonlocal add_point_call_count - add_point_call_count += 1 - if add_point_call_count == 2: - raise uploader_lib._OutOfSpaceError() - - with mock.patch.object( - uploader_lib._ByteBudgetManager, - "add_point", - mock_add_point, - ): - sender = _create_tensor_request_sender("123", mock_client) - self._add_events(sender, "train", _apply_compat([event_1])) - self._add_events(sender, "test", _apply_compat([event_2])) - sender.flush() - requests = [c[0][0] for c in mock_client.WriteTensor.call_args_list] - - # Expect two RPC calls despite a single explicit call to flush(). - self.assertEqual(2, len(requests)) - # First RPC contains one tag. - self.assertEqual(1, len(requests[0].runs)) - self.assertEqual("train", requests[0].runs[0].name) - self.assertEqual(1, len(requests[0].runs[0].tags)) - self.assertEqual("one", requests[0].runs[0].tags[0].name) - # Second RPC contains the other tag. - self.assertEqual(1, len(requests[1].runs)) - self.assertEqual("test", requests[1].runs[0].name) - self.assertEqual(1, len(requests[1].runs[0].tags)) - self.assertEqual("two", requests[1].runs[0].tags[0].name) - - def test_wall_time_precision(self): - # Test a wall time that is exactly representable in float64 but has enough - # digits to incur error if converted to nanoseconds the naive way (* 1e9). - event_1 = event_pb2.Event(step=1, wall_time=1567808404.765432119) - event_1.summary.value.add( - tag="tag", - tensor=tensor_pb2.TensorProto( - dtype=types_pb2.DT_DOUBLE, double_val=[1.0] - ), - ) - # Test a wall time where as a float64, the fractional part on its own will - # introduce error if truncated to 9 decimal places instead of rounded. - event_2 = event_pb2.Event(step=2, wall_time=1.000000002) - event_2.summary.value.add( - tag="tag", - tensor=tensor_pb2.TensorProto( - dtype=types_pb2.DT_DOUBLE, double_val=[2.0] - ), - ) - run_proto = self._add_events_and_flush( - _apply_compat([event_1, event_2]) - ) - self.assertEqual( - test_util.timestamp_pb(1567808404765432119), - run_proto.tags[0].points[0].wall_time, - ) - self.assertEqual( - test_util.timestamp_pb(1000000002), - run_proto.tags[0].points[1].wall_time, - ) - - class DeleteExperimentTest(tf.test.TestCase): def _create_mock_client(self): # Create a stub instance (using a test channel) in order to derive a mock @@ -1992,39 +175,5 @@ def test_internal_error(self): self.assertIn("travesty", msg) -class VarintCostTest(tf.test.TestCase): - def test_varint_cost(self): - self.assertEqual(uploader_lib._varint_cost(0), 1) - self.assertEqual(uploader_lib._varint_cost(7), 1) - self.assertEqual(uploader_lib._varint_cost(127), 1) - self.assertEqual(uploader_lib._varint_cost(128), 2) - self.assertEqual(uploader_lib._varint_cost(128 * 128 - 1), 2) - self.assertEqual(uploader_lib._varint_cost(128 * 128), 3) - - -def _clear_wall_times(request): - """Clears the wall_time fields in a WriteScalarRequest to be - deterministic.""" - for run in request.runs: - for tag in run.tags: - for point in tag.points: - point.ClearField("wall_time") - - -def _apply_compat(events): - initial_metadata = {} - for event in events: - event = data_compat.migrate_event(event) - events = dataclass_compat.migrate_event( - event, initial_metadata=initial_metadata - ) - for event in events: - yield event - - -def _extract_tag_counts(run_proto): - return {tag.name: len(tag.points) for tag in run_proto.tags} - - if __name__ == "__main__": tf.test.main() diff --git a/tensorboard/uploader/util.py b/tensorboard/uploader/util.py index f8917e86f6..3b90fdbbae 100644 --- a/tensorboard/uploader/util.py +++ b/tensorboard/uploader/util.py @@ -19,28 +19,6 @@ import errno import os import os.path -import time - - -class RateLimiter: - """Helper class for rate-limiting using a fixed minimum interval.""" - - def __init__(self, interval_secs): - """Constructs a RateLimiter that permits a tick() every - `interval_secs`.""" - self._time = time # Use property for ease of testing. - self._interval_secs = interval_secs - self._last_called_secs = 0 - - def tick(self): - """Blocks until it has been at least `interval_secs` since last - tick().""" - wait_secs = ( - self._last_called_secs + self._interval_secs - self._time.time() - ) - if wait_secs > 0: - self._time.sleep(wait_secs) - self._last_called_secs = self._time.time() def get_user_config_directory(): diff --git a/tensorboard/uploader/util_test.py b/tensorboard/uploader/util_test.py index 49d12694d5..09cc7101eb 100644 --- a/tensorboard/uploader/util_test.py +++ b/tensorboard/uploader/util_test.py @@ -22,35 +22,10 @@ from unittest import mock from google.protobuf import timestamp_pb2 -from tensorboard.uploader import test_util from tensorboard.uploader import util from tensorboard import test as tb_test -class RateLimiterTest(tb_test.TestCase): - def test_rate_limiting(self): - rate_limiter = util.RateLimiter(10) - fake_time = test_util.FakeTime(current=1000) - with mock.patch.object(rate_limiter, "_time", fake_time): - self.assertEqual(1000, fake_time.time()) - # No sleeping for initial tick. - rate_limiter.tick() - self.assertEqual(1000, fake_time.time()) - # Second tick requires a full sleep. - rate_limiter.tick() - self.assertEqual(1010, fake_time.time()) - # Third tick requires a sleep just to make up the remaining second. - fake_time.sleep(9) - self.assertEqual(1019, fake_time.time()) - rate_limiter.tick() - self.assertEqual(1020, fake_time.time()) - # Fourth tick requires no sleep since we have no remaining seconds. - fake_time.sleep(11) - self.assertEqual(1031, fake_time.time()) - rate_limiter.tick() - self.assertEqual(1031, fake_time.time()) - - class GetUserConfigDirectoryTest(tb_test.TestCase): def test_windows(self): with mock.patch.object(os, "name", "nt"): diff --git a/tensorboard/version.py b/tensorboard/version.py index 2630468e2f..87847ea5d7 100644 --- a/tensorboard/version.py +++ b/tensorboard/version.py @@ -15,7 +15,7 @@ """Contains the version string.""" -VERSION = "2.15.0" +VERSION = "2.15.1" if __name__ == "__main__": print(VERSION) diff --git a/tensorboard/webapp/metrics/views/main_view/card_grid_component.scss b/tensorboard/webapp/metrics/views/main_view/card_grid_component.scss index c78c16ba04..c12d11cf3b 100644 --- a/tensorboard/webapp/metrics/views/main_view/card_grid_component.scss +++ b/tensorboard/webapp/metrics/views/main_view/card_grid_component.scss @@ -92,10 +92,8 @@ card-view { } .pagination-button { - @include tb-theme-foreground-prop(color, secondary-text); background-color: $metrics-button-background-color-on-gray; - - &:disabled { - @include tb-theme-foreground-prop(color, disabled-text); + @include tb-dark-theme { + background-color: transparent; } } diff --git a/tensorboard/webapp/runs/views/runs_table/BUILD b/tensorboard/webapp/runs/views/runs_table/BUILD index 4d936d8af1..6ccc536e0b 100644 --- a/tensorboard/webapp/runs/views/runs_table/BUILD +++ b/tensorboard/webapp/runs/views/runs_table/BUILD @@ -64,6 +64,7 @@ tf_ng_module( "runs_table_component.ts", "runs_table_container.ts", "runs_table_module.ts", + "sorting_utils.ts", ], assets = [ ":regex_edit_dialog_styles", @@ -131,6 +132,7 @@ tf_ts_library( "regex_edit_dialog_test.ts", "runs_data_table_test.ts", "runs_table_test.ts", + "sorting_utils_test.ts", ], deps = [ ":runs_table", diff --git a/tensorboard/webapp/runs/views/runs_table/runs_data_table.ng.html b/tensorboard/webapp/runs/views/runs_table/runs_data_table.ng.html index 0097393986..4848283c26 100644 --- a/tensorboard/webapp/runs/views/runs_table/runs_data_table.ng.html +++ b/tensorboard/webapp/runs/views/runs_table/runs_data_table.ng.html @@ -23,17 +23,14 @@ placeholder="Filter runs (regex)" > -
- -
{ - let aValue = a[sort.name]; - let bValue = b[sort.name]; - - if (sort.name === 'experimentAlias') { - aValue = (aValue as ExperimentAlias).aliasNumber; - bValue = (bValue as ExperimentAlias).aliasNumber; - } - - if (aValue === bValue) { - return 0; - } - - if (aValue === undefined || bValue === undefined) { - return bValue === undefined ? -1 : 1; - } - - return aValue < bValue === (sort.order === SortingOrder.ASCENDING) ? -1 : 1; - }); - return sortedItems; -} - function matchFilter( filter: DiscreteFilter | IntervalFilter, value: number | DiscreteHparamValue | undefined diff --git a/tensorboard/webapp/runs/views/runs_table/sorting_utils.ts b/tensorboard/webapp/runs/views/runs_table/sorting_utils.ts new file mode 100644 index 0000000000..cf67fbaf5f --- /dev/null +++ b/tensorboard/webapp/runs/views/runs_table/sorting_utils.ts @@ -0,0 +1,131 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +import { + SortingInfo, + SortingOrder, + TableData, +} from '../../../widgets/data_table/types'; +import {ExperimentAlias} from '../../../experiments/types'; + +enum UndefinedStrategy { + BEFORE, + AFTER, +} + +interface SortOptions { + insertUndefined: UndefinedStrategy; +} + +const POTENTIALLY_NUMERIC_TYPES = new Set(['string', 'number']); + +const DEFAULT_SORT_OPTIONS: SortOptions = { + insertUndefined: UndefinedStrategy.AFTER, +}; + +export function parseNumericPrefix(value: string | number) { + if (typeof value === 'number') { + return isNaN(value) ? undefined : value; + } + + if (!isNaN(parseInt(value))) { + return parseInt(value); + } + + for (let i = 0; i < value.length; i++) { + if (isNaN(parseInt(value[i]))) { + if (i === 0) return; + return parseInt(value.slice(0, i)); + } + } + + return; +} + +export function sortTableDataItems( + items: TableData[], + sort: SortingInfo +): TableData[] { + const sortedItems = [...items]; + + sortedItems.sort((a, b) => { + let aValue = a[sort.name]; + let bValue = b[sort.name]; + + if (sort.name === 'experimentAlias') { + aValue = (aValue as ExperimentAlias).aliasNumber; + bValue = (bValue as ExperimentAlias).aliasNumber; + } + + if (aValue === bValue) { + return 0; + } + + if (aValue === undefined || bValue === undefined) { + return compareValues(aValue, bValue); + } + + if ( + POTENTIALLY_NUMERIC_TYPES.has(typeof aValue) && + POTENTIALLY_NUMERIC_TYPES.has(typeof bValue) + ) { + const aPrefix = parseNumericPrefix(aValue as string | number); + const bPrefix = parseNumericPrefix(bValue as string | number); + // Show runs with numbers before to runs without numbers + if ( + (aPrefix === undefined || bPrefix === undefined) && + aPrefix !== bPrefix + ) { + return compareValues(aPrefix, bPrefix, { + insertUndefined: UndefinedStrategy.BEFORE, + }); + } + if (aPrefix !== undefined && bPrefix !== undefined) { + if (aPrefix === bPrefix) { + const aPostfix = + aValue.toString().slice(aPrefix.toString().length) || undefined; + const bPostfix = + bValue.toString().slice(bPrefix.toString().length) || undefined; + return compareValues(aPostfix, bPostfix, { + insertUndefined: UndefinedStrategy.BEFORE, + }); + } + + return compareValues(aPrefix, bPrefix); + } + } + + return compareValues(aValue, bValue); + }); + return sortedItems; + + function compareValues( + a: TableData[string] | undefined, + b: TableData[string] | undefined, + {insertUndefined}: SortOptions = DEFAULT_SORT_OPTIONS + ) { + if (a === b) { + return 0; + } + + if (a === undefined) { + return insertUndefined === UndefinedStrategy.AFTER ? 1 : -1; + } + if (b === undefined) { + return insertUndefined === UndefinedStrategy.AFTER ? -1 : 1; + } + + return a < b === (sort.order === SortingOrder.ASCENDING) ? -1 : 1; + } +} diff --git a/tensorboard/webapp/runs/views/runs_table/sorting_utils_test.ts b/tensorboard/webapp/runs/views/runs_table/sorting_utils_test.ts new file mode 100644 index 0000000000..25348ec939 --- /dev/null +++ b/tensorboard/webapp/runs/views/runs_table/sorting_utils_test.ts @@ -0,0 +1,286 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +import {SortingOrder} from '../../../widgets/data_table/types'; +import {parseNumericPrefix, sortTableDataItems} from './sorting_utils'; + +describe('sorting utils', () => { + describe('parseNumericPrefix', () => { + it('returns undefined when a non numeric value is provided', () => { + expect(parseNumericPrefix('')).toBeUndefined(); + expect(parseNumericPrefix('foo')).toBeUndefined(); + expect(parseNumericPrefix('foo123')).toBeUndefined(); + expect(parseNumericPrefix(NaN)).toBeUndefined(); + }); + + it('returns all leading numbers from a string', () => { + expect(parseNumericPrefix('0')).toEqual(0); + expect(parseNumericPrefix('123')).toEqual(123); + expect(parseNumericPrefix('123train')).toEqual(123); + expect(parseNumericPrefix('123/')).toEqual(123); + expect(parseNumericPrefix('123/foo')).toEqual(123); + expect(parseNumericPrefix('123/foo/456')).toEqual(123); + }); + + it('returns numbers when provided', () => { + expect(parseNumericPrefix(123)).toEqual(123); + }); + }); + + describe('sortTableDataItems', () => { + it('sorts experimentAlias by alias number', () => { + expect( + sortTableDataItems( + [ + { + id: 'row 1 id', + experimentAlias: { + aliasNumber: 5, + }, + }, + { + id: 'row 2 id', + experimentAlias: { + aliasNumber: 3, + }, + }, + ], + { + order: SortingOrder.ASCENDING, + name: 'experimentAlias', + } + ) + ).toEqual([ + { + id: 'row 2 id', + experimentAlias: { + aliasNumber: 3, + }, + }, + { + id: 'row 1 id', + experimentAlias: { + aliasNumber: 5, + }, + }, + ]); + }); + + it('sorts runs by their leading numbers', () => { + expect( + sortTableDataItems( + [ + { + id: 'row 1 id', + name: '1/myrun', + }, + { + id: 'row 2 id', + name: '2/myrun', + }, + { + id: 'row 3 id', + name: '10/myrun', + }, + ], + { + order: SortingOrder.ASCENDING, + name: 'name', + } + ) + ).toEqual([ + { + id: 'row 1 id', + name: '1/myrun', + }, + { + id: 'row 2 id', + name: '2/myrun', + }, + { + id: 'row 3 id', + name: '10/myrun', + }, + ]); + }); + + it('sorts runs with purely numeric run names before runs with leading numbers', () => { + expect( + sortTableDataItems( + [ + { + id: 'row 1 id', + name: '0', + }, + { + id: 'row 2 id', + name: '0/myrun2', + }, + { + id: 'row 3 id', + name: '0/myrun1', + }, + ], + { + order: SortingOrder.ASCENDING, + name: 'name', + } + ) + ).toEqual([ + { + id: 'row 1 id', + name: '0', + }, + { + id: 'row 3 id', + name: '0/myrun1', + }, + { + id: 'row 2 id', + name: '0/myrun2', + }, + ]); + }); + + it('sorts runs with string names', () => { + expect( + sortTableDataItems( + [ + { + id: 'row 1 id', + name: 'aaa', + }, + { + id: 'row 2 id', + name: 'bbb', + }, + { + id: 'row 3 id', + name: 'ccc', + }, + ], + { + order: SortingOrder.ASCENDING, + name: 'name', + } + ) + ).toEqual([ + { + id: 'row 1 id', + name: 'aaa', + }, + { + id: 'row 2 id', + name: 'bbb', + }, + { + id: 'row 3 id', + name: 'ccc', + }, + ]); + }); + + it('shows runs without numbers before runs with numbers', () => { + expect( + sortTableDataItems( + [ + { + id: 'row 1 id', + name: 'aaa', + }, + { + id: 'row 2 id', + name: '1aaa', + }, + { + id: 'row 3 id', + name: '2bbb', + }, + ], + { + order: SortingOrder.ASCENDING, + name: 'name', + } + ) + ).toEqual([ + { + id: 'row 1 id', + name: 'aaa', + }, + { + id: 'row 2 id', + name: '1aaa', + }, + { + id: 'row 3 id', + name: '2bbb', + }, + ]); + }); + + it('places undefined values at the end', () => { + const input: any = [ + { + id: 'row 1 id', + foo: '1/myrun', + }, + { + id: 'row 2 id', + }, + { + id: 'row 3 id', + foo: '10/myrun', + }, + ]; + + expect( + sortTableDataItems(input, { + order: SortingOrder.ASCENDING, + name: 'foo', + }) + ).toEqual([ + { + id: 'row 1 id', + foo: '1/myrun', + }, + { + id: 'row 3 id', + foo: '10/myrun', + }, + { + id: 'row 2 id', + }, + ]); + + expect( + sortTableDataItems(input, { + order: SortingOrder.DESCENDING, + name: 'foo', + }) + ).toEqual([ + { + id: 'row 3 id', + foo: '10/myrun', + }, + { + id: 'row 1 id', + foo: '1/myrun', + }, + { + id: 'row 2 id', + }, + ]); + }); + }); +}); diff --git a/tensorboard/webapp/widgets/data_table/data_table_component.ng.html b/tensorboard/webapp/widgets/data_table/data_table_component.ng.html index e7872fdc16..e7e4357aee 100644 --- a/tensorboard/webapp/widgets/data_table/data_table_component.ng.html +++ b/tensorboard/webapp/widgets/data_table/data_table_component.ng.html @@ -113,3 +113,6 @@
+
+ +
diff --git a/tensorboard/webapp/widgets/data_table/data_table_component.scss b/tensorboard/webapp/widgets/data_table/data_table_component.scss index 02a53f75fa..125e8781a4 100644 --- a/tensorboard/webapp/widgets/data_table/data_table_component.scss +++ b/tensorboard/webapp/widgets/data_table/data_table_component.scss @@ -45,6 +45,16 @@ $_accent: map-get(mat.get-color-config($tb-theme), accent); } } +.loading { + align-items: center; + border: 0; + @include tb-theme-foreground-prop(border-bottom, border, 1px solid); + display: flex; + height: 48px; + padding: 0 24px; + justify-content: center; +} + .add-button-cell { display: table-cell; width: 40px; diff --git a/tensorboard/webapp/widgets/data_table/data_table_component.ts b/tensorboard/webapp/widgets/data_table/data_table_component.ts index 336d1e676d..95993ae8cd 100644 --- a/tensorboard/webapp/widgets/data_table/data_table_component.ts +++ b/tensorboard/webapp/widgets/data_table/data_table_component.ts @@ -65,6 +65,7 @@ export class DataTableComponent implements OnDestroy, AfterContentInit { @Input() columnCustomizationEnabled!: boolean; @Input() selectableColumns?: ColumnHeader[]; @Input() columnFilters!: Map; + @Input() loading: boolean = false; @ContentChildren(HeaderCellComponent) headerCells!: QueryList; diff --git a/tensorboard/webapp/widgets/data_table/data_table_module.ts b/tensorboard/webapp/widgets/data_table/data_table_module.ts index 62ced1d9f5..2220aef24d 100644 --- a/tensorboard/webapp/widgets/data_table/data_table_module.ts +++ b/tensorboard/webapp/widgets/data_table/data_table_module.ts @@ -17,6 +17,7 @@ import {CommonModule} from '@angular/common'; import {NgModule} from '@angular/core'; import {MatIconModule} from '@angular/material/icon'; import {MatButtonModule} from '@angular/material/button'; +import {MatProgressSpinnerModule} from '@angular/material/progress-spinner'; import {DataTableComponent} from './data_table_component'; import {HeaderCellComponent} from './header_cell_component'; import {DataTableHeaderModule} from './data_table_header_module'; @@ -43,6 +44,7 @@ import {FilterDialogModule} from './filter_dialog_module'; CommonModule, MatIconModule, MatButtonModule, + MatProgressSpinnerModule, DataTableHeaderModule, CustomModalModule, ColumnSelectorModule, diff --git a/tensorboard/webapp/widgets/data_table/data_table_test.ts b/tensorboard/webapp/widgets/data_table/data_table_test.ts index ec9a2ab96c..3a575be9c1 100644 --- a/tensorboard/webapp/widgets/data_table/data_table_test.ts +++ b/tensorboard/webapp/widgets/data_table/data_table_test.ts @@ -46,6 +46,7 @@ import {FilterDialog} from './filter_dialog_component'; [sortingInfo]="sortingInfo" [selectableColumns]="selectableColumns" [columnFilters]="columnFilters" + [loading]="loading" (sortDataBy)="sortDataBy($event)" (orderColumns)="orderColumns($event)" (addColumn)="addColumn.emit($event)" @@ -88,6 +89,7 @@ class TestableComponent { @Input() orderColumns!: (newOrder: ColumnHeaderType[]) => void; @Input() selectableColumns!: ColumnHeader[]; @Input() columnFilters!: Map; + @Input() loading!: boolean; @Output() addColumn = new EventEmitter<{ header: ColumnHeader; @@ -123,6 +125,7 @@ describe('data table', () => { data?: TableData[]; potentialColumns?: ColumnHeader[]; columnFilters?: Map; + loading?: boolean; }): ComponentFixture { const fixture = TestBed.createComponent(TestableComponent); @@ -140,6 +143,10 @@ describe('data table', () => { fixture.componentInstance.selectableColumns = input.potentialColumns; } + if (input.loading !== undefined) { + fixture.componentInstance.loading = input.loading; + } + fixture.componentInstance.columnFilters = input.columnFilters || new Map(); sortDataBySpy = jasmine.createSpy(); @@ -159,6 +166,20 @@ describe('data table', () => { expect(dataTable).toBeTruthy(); }); + it('renders spinner when loading', () => { + const fixture = createComponent({loading: true}); + fixture.detectChanges(); + const spinner = fixture.debugElement.query(By.css('.loading')); + expect(spinner).toBeTruthy(); + }); + + it('does not renders spinner when not loading', () => { + const fixture = createComponent({loading: false}); + fixture.detectChanges(); + const spinner = fixture.debugElement.query(By.css('.loading')); + expect(spinner).toBeFalsy(); + }); + it('emits sortDataBy event when header emits headerClicked event', () => { const fixture = createComponent({ headers: [