From 6d549586f2d4112e7b95294b24d01ece6bc8f043 Mon Sep 17 00:00:00 2001 From: David Soergel Date: Fri, 10 Apr 2020 14:50:42 -0400 Subject: [PATCH 1/9] Detect and skip corrupt GraphDefs --- tensorboard/BUILD | 1 + tensorboard/dataclass_compat.py | 19 ++++++++++++++----- tensorboard/dataclass_compat_test.py | 17 +++++++++++++++++ tensorboard/uploader/uploader_test.py | 22 ++++++++++++++++++---- 4 files changed, 50 insertions(+), 9 deletions(-) diff --git a/tensorboard/BUILD b/tensorboard/BUILD index 70884b2798..e9a756ad97 100644 --- a/tensorboard/BUILD +++ b/tensorboard/BUILD @@ -460,6 +460,7 @@ py_library( srcs_version = "PY2AND3", visibility = ["//visibility:public"], deps = [ + "@com_google_protobuf//:protobuf_python", "//tensorboard/compat:tensorflow", "//tensorboard/compat/proto:protos_all_py_pb2", "//tensorboard/plugins/audio:metadata", diff --git a/tensorboard/dataclass_compat.py b/tensorboard/dataclass_compat.py index 087ce42172..4ece50ae33 100644 --- a/tensorboard/dataclass_compat.py +++ b/tensorboard/dataclass_compat.py @@ -26,6 +26,8 @@ from __future__ import print_function + +from google.protobuf import message from tensorboard.backend import process_graph from tensorboard.compat.proto import event_pb2 from tensorboard.compat.proto import graph_pb2 @@ -38,6 +40,9 @@ from tensorboard.plugins.scalar import metadata as scalars_metadata from tensorboard.plugins.text import metadata as text_metadata from tensorboard.util import tensor_util +from tensorboard.util import tb_logging + +logger = tb_logging.get_logger() def migrate_event(event, experimental_filter_graph=False): @@ -72,11 +77,15 @@ def _migrate_graph_event(old_event, experimental_filter_graph=False): # TODO(@davidsoergel): Move this stopgap to a more appropriate place. if experimental_filter_graph: - graph_def = graph_pb2.GraphDef().FromString(graph_bytes) - # Use the default filter parameters: - # limit_attr_size=1024, large_attrs_key="_too_large_attrs" - process_graph.prepare_graph_for_ui(graph_def) - graph_bytes = graph_def.SerializeToString() + try: + graph_def = graph_pb2.GraphDef().FromString(graph_bytes) + # Use the default filter parameters: + # limit_attr_size=1024, large_attrs_key="_too_large_attrs" + process_graph.prepare_graph_for_ui(graph_def) + graph_bytes = graph_def.SerializeToString() + except message.DecodeError: + logger.warning("Could not parse GraphDef. Skipping.") + return (old_event,) value.tensor.CopyFrom(tensor_util.make_tensor_proto([graph_bytes])) value.metadata.plugin_data.plugin_name = graphs_metadata.PLUGIN_NAME diff --git a/tensorboard/dataclass_compat_test.py b/tensorboard/dataclass_compat_test.py index 89e1ed94cc..c83c055b2f 100644 --- a/tensorboard/dataclass_compat_test.py +++ b/tensorboard/dataclass_compat_test.py @@ -253,6 +253,23 @@ def test_graph_def_experimental_filter_graph(self): self.assertProtoEquals(expected_graph_def, new_graph_def) + def test_graph_def_experimental_filter_graph_corrupt(self): + # Simulate legacy graph event with an unparseable graph + old_event = event_pb2.Event() + old_event.step = 0 + old_event.wall_time = 456.75 + # Careful: some proto parsers choke on byte arrays filled with 0, but + # others don't (silently producing an empty proto, I guess). + # Thus `old_event.graph_def = bytes(1024)` is an unreliable example. + old_event.graph_def = b"bogus" + + new_events = self._migrate_event( + old_event, experimental_filter_graph=True + ) + # _migrate_event emits both the original event and the migrated event, + # but here there is no migrated event becasue the graph was unparseable. + self.assertLen(new_events, 1) + if __name__ == "__main__": tf.test.main() diff --git a/tensorboard/uploader/uploader_test.py b/tensorboard/uploader/uploader_test.py index 78f83fb12d..35928b6b96 100644 --- a/tensorboard/uploader/uploader_test.py +++ b/tensorboard/uploader/uploader_test.py @@ -41,6 +41,7 @@ from tensorboard.uploader import logdir_loader from tensorboard.uploader import util from tensorboard.compat.proto import event_pb2 +from tensorboard.compat.proto import graph_pb2 from tensorboard.compat.proto import summary_pb2 from tensorboard.plugins.histogram import summary_v2 as histogram_v2 from tensorboard.plugins.graph import metadata as graphs_metadata @@ -50,6 +51,19 @@ from tensorboard.util import test_util as tb_test_util +def _create_example_graph(test_attr_size): + graph_def = graph_pb2.GraphDef() + graph_def.node.add(name="alice", op="Person") + graph_def.node.add(name="bob", op="Person") + + graph_def.node[1].attr["small"].s = b"small_attr_value" + graph_def.node[1].attr["large"].s = b"l" * test_attr_size + graph_def.node.add( + name="friendship", op="Friendship", input=["alice", "bob"] + ) + return graph_def.SerializeToString() + + class AbortUploadError(Exception): """Exception used in testing to abort the upload process.""" @@ -264,7 +278,7 @@ def test_start_uploading_graphs(self): # Of course a real Event stream will never produce the same Event twice, # but is this test context it's fine to reuse this one. - graph_event = event_pb2.Event(graph_def=bytes(950)) + graph_event = event_pb2.Event(graph_def=_create_example_graph(950)) mock_logdir_loader = mock.create_autospec(logdir_loader.LogdirLoader) mock_logdir_loader.get_run_events.side_effect = [ @@ -307,7 +321,7 @@ def test_upload_skip_large_blob(self): ) uploader.create_experiment() - graph_event = event_pb2.Event(graph_def=bytes(950)) + graph_event = event_pb2.Event(graph_def=_create_example_graph(950)) mock_logdir_loader = mock.create_autospec(logdir_loader.LogdirLoader) mock_logdir_loader.get_run_events.side_effect = [ @@ -342,7 +356,7 @@ def test_upload_server_error(self): # Of course a real Event stream will never produce the same Event twice, # but is this test context it's fine to reuse this one. - graph_event = event_pb2.Event(graph_def=bytes(950)) + graph_event = event_pb2.Event(graph_def=_create_example_graph(950)) mock_logdir_loader = mock.create_autospec(logdir_loader.LogdirLoader) mock_logdir_loader.get_run_events.side_effect = [ @@ -383,7 +397,7 @@ def test_upload_same_graph_twice(self): ) uploader.create_experiment() - graph_event = event_pb2.Event(graph_def=bytes(950)) + graph_event = event_pb2.Event(graph_def=_create_example_graph(950)) mock_logdir_loader = mock.create_autospec(logdir_loader.LogdirLoader) mock_logdir_loader.get_run_events.side_effect = [ From b7801697bfe28e84f8936ca570a6b330585bf862 Mon Sep 17 00:00:00 2001 From: David Soergel Date: Fri, 10 Apr 2020 14:51:45 -0400 Subject: [PATCH 2/9] lint --- tensorboard/dataclass_compat.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorboard/dataclass_compat.py b/tensorboard/dataclass_compat.py index 4ece50ae33..58f8d96bd3 100644 --- a/tensorboard/dataclass_compat.py +++ b/tensorboard/dataclass_compat.py @@ -26,7 +26,6 @@ from __future__ import print_function - from google.protobuf import message from tensorboard.backend import process_graph from tensorboard.compat.proto import event_pb2 From 141347f976d09cb27dabd613816de3e20e9d6d18 Mon Sep 17 00:00:00 2001 From: David Soergel Date: Fri, 10 Apr 2020 15:33:26 -0400 Subject: [PATCH 3/9] Reviewer comments --- tensorboard/dataclass_compat.py | 13 ++++++++----- tensorboard/dataclass_compat_test.py | 1 + tensorboard/uploader/uploader_test.py | 18 +++++++++++++----- 3 files changed, 22 insertions(+), 10 deletions(-) diff --git a/tensorboard/dataclass_compat.py b/tensorboard/dataclass_compat.py index 58f8d96bd3..a9b001a381 100644 --- a/tensorboard/dataclass_compat.py +++ b/tensorboard/dataclass_compat.py @@ -78,13 +78,16 @@ def _migrate_graph_event(old_event, experimental_filter_graph=False): if experimental_filter_graph: try: graph_def = graph_pb2.GraphDef().FromString(graph_bytes) - # Use the default filter parameters: - # limit_attr_size=1024, large_attrs_key="_too_large_attrs" - process_graph.prepare_graph_for_ui(graph_def) - graph_bytes = graph_def.SerializeToString() except message.DecodeError: - logger.warning("Could not parse GraphDef. Skipping.") + logger.warning( + "Could not parse GraphDef of size %d. Skipping.", + len(graph_bytes), + ) return (old_event,) + # Use the default filter parameters: + # limit_attr_size=1024, large_attrs_key="_too_large_attrs" + process_graph.prepare_graph_for_ui(graph_def) + graph_bytes = graph_def.SerializeToString() value.tensor.CopyFrom(tensor_util.make_tensor_proto([graph_bytes])) value.metadata.plugin_data.plugin_name = graphs_metadata.PLUGIN_NAME diff --git a/tensorboard/dataclass_compat_test.py b/tensorboard/dataclass_compat_test.py index c83c055b2f..5985287826 100644 --- a/tensorboard/dataclass_compat_test.py +++ b/tensorboard/dataclass_compat_test.py @@ -269,6 +269,7 @@ def test_graph_def_experimental_filter_graph_corrupt(self): # _migrate_event emits both the original event and the migrated event, # but here there is no migrated event becasue the graph was unparseable. self.assertLen(new_events, 1) + self.assertProtoEquals(new_events[0], old_event) if __name__ == "__main__": diff --git a/tensorboard/uploader/uploader_test.py b/tensorboard/uploader/uploader_test.py index 35928b6b96..8184392152 100644 --- a/tensorboard/uploader/uploader_test.py +++ b/tensorboard/uploader/uploader_test.py @@ -51,7 +51,7 @@ from tensorboard.util import test_util as tb_test_util -def _create_example_graph(test_attr_size): +def _create_example_graph_bytes(test_attr_size): graph_def = graph_pb2.GraphDef() graph_def.node.add(name="alice", op="Person") graph_def.node.add(name="bob", op="Person") @@ -278,7 +278,9 @@ def test_start_uploading_graphs(self): # Of course a real Event stream will never produce the same Event twice, # but is this test context it's fine to reuse this one. - graph_event = event_pb2.Event(graph_def=_create_example_graph(950)) + graph_event = event_pb2.Event( + graph_def=_create_example_graph_bytes(950) + ) mock_logdir_loader = mock.create_autospec(logdir_loader.LogdirLoader) mock_logdir_loader.get_run_events.side_effect = [ @@ -321,7 +323,9 @@ def test_upload_skip_large_blob(self): ) uploader.create_experiment() - graph_event = event_pb2.Event(graph_def=_create_example_graph(950)) + graph_event = event_pb2.Event( + graph_def=_create_example_graph_bytes(950) + ) mock_logdir_loader = mock.create_autospec(logdir_loader.LogdirLoader) mock_logdir_loader.get_run_events.side_effect = [ @@ -356,7 +360,9 @@ def test_upload_server_error(self): # Of course a real Event stream will never produce the same Event twice, # but is this test context it's fine to reuse this one. - graph_event = event_pb2.Event(graph_def=_create_example_graph(950)) + graph_event = event_pb2.Event( + graph_def=_create_example_graph_bytes(950) + ) mock_logdir_loader = mock.create_autospec(logdir_loader.LogdirLoader) mock_logdir_loader.get_run_events.side_effect = [ @@ -397,7 +403,9 @@ def test_upload_same_graph_twice(self): ) uploader.create_experiment() - graph_event = event_pb2.Event(graph_def=_create_example_graph(950)) + graph_event = event_pb2.Event( + graph_def=_create_example_graph_bytes(950) + ) mock_logdir_loader = mock.create_autospec(logdir_loader.LogdirLoader) mock_logdir_loader.get_run_events.side_effect = [ From 44cceb07a0b4e0afeac59482d5014619b8f56c91 Mon Sep 17 00:00:00 2001 From: David Soergel Date: Fri, 10 Apr 2020 15:47:22 -0400 Subject: [PATCH 4/9] reviewer comment --- tensorboard/uploader/uploader_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorboard/uploader/uploader_test.py b/tensorboard/uploader/uploader_test.py index 8184392152..c720edaf5f 100644 --- a/tensorboard/uploader/uploader_test.py +++ b/tensorboard/uploader/uploader_test.py @@ -51,13 +51,13 @@ from tensorboard.util import test_util as tb_test_util -def _create_example_graph_bytes(test_attr_size): +def _create_example_graph_bytes(large_attr_size): graph_def = graph_pb2.GraphDef() graph_def.node.add(name="alice", op="Person") graph_def.node.add(name="bob", op="Person") graph_def.node[1].attr["small"].s = b"small_attr_value" - graph_def.node[1].attr["large"].s = b"l" * test_attr_size + graph_def.node[1].attr["large"].s = b"l" * large_attr_size graph_def.node.add( name="friendship", op="Friendship", input=["alice", "bob"] ) From 552d051e366262775569de5906a3a0c681c9531d Mon Sep 17 00:00:00 2001 From: David Soergel Date: Fri, 10 Apr 2020 16:26:33 -0400 Subject: [PATCH 5/9] BUILD lint --- tensorboard/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorboard/BUILD b/tensorboard/BUILD index e9a756ad97..75aa85bc6d 100644 --- a/tensorboard/BUILD +++ b/tensorboard/BUILD @@ -460,7 +460,6 @@ py_library( srcs_version = "PY2AND3", visibility = ["//visibility:public"], deps = [ - "@com_google_protobuf//:protobuf_python", "//tensorboard/compat:tensorflow", "//tensorboard/compat/proto:protos_all_py_pb2", "//tensorboard/plugins/audio:metadata", @@ -468,6 +467,7 @@ py_library( "//tensorboard/plugins/image:metadata", "//tensorboard/plugins/scalar:metadata", "//tensorboard/util:tensor_util", + "@com_google_protobuf//:protobuf_python", ], ) From b6748cd04cf0867011c1f416832ad4584a9a76dd Mon Sep 17 00:00:00 2001 From: David Soergel Date: Mon, 13 Apr 2020 16:40:29 -0400 Subject: [PATCH 6/9] Catch RuntimeWarning when parsing GraphDef protos. --- tensorboard/dataclass_compat_test.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/tensorboard/dataclass_compat_test.py b/tensorboard/dataclass_compat_test.py index 5985287826..4002a11353 100644 --- a/tensorboard/dataclass_compat_test.py +++ b/tensorboard/dataclass_compat_test.py @@ -261,7 +261,7 @@ def test_graph_def_experimental_filter_graph_corrupt(self): # Careful: some proto parsers choke on byte arrays filled with 0, but # others don't (silently producing an empty proto, I guess). # Thus `old_event.graph_def = bytes(1024)` is an unreliable example. - old_event.graph_def = b"bogus" + old_event.graph_def = b"" new_events = self._migrate_event( old_event, experimental_filter_graph=True @@ -271,6 +271,27 @@ def test_graph_def_experimental_filter_graph_corrupt(self): self.assertLen(new_events, 1) self.assertProtoEquals(new_events[0], old_event) + def test_graph_def_experimental_filter_graph_empty(self): + # Simulate legacy graph event with an empty graph + old_event = event_pb2.Event() + old_event.step = 0 + old_event.wall_time = 456.75 + old_event.graph_def = b'' + + new_events = self._migrate_event( + old_event, experimental_filter_graph=True + ) + # _migrate_event emits both the original event and the migrated event. + # The migrated event contains a default empty GraphDef. + self.assertLen(new_events, 2) + self.assertProtoEquals(new_events[0], old_event) + + new_event = new_events[1] + tensor = tensor_util.make_ndarray(new_event.summary.value[0].tensor) + new_graph_def_bytes = tensor[0] + new_graph_def = graph_pb2.GraphDef.FromString(new_graph_def_bytes) + self.assertProtoEquals(new_graph_def, graph_pb2.GraphDef()) + if __name__ == "__main__": tf.test.main() From e8eb6a52a033659f7ef477b7bba1c8e4c616c398 Mon Sep 17 00:00:00 2001 From: David Soergel Date: Mon, 13 Apr 2020 18:11:36 -0400 Subject: [PATCH 7/9] lint --- tensorboard/dataclass_compat_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorboard/dataclass_compat_test.py b/tensorboard/dataclass_compat_test.py index 4002a11353..cca3b8d696 100644 --- a/tensorboard/dataclass_compat_test.py +++ b/tensorboard/dataclass_compat_test.py @@ -276,7 +276,7 @@ def test_graph_def_experimental_filter_graph_empty(self): old_event = event_pb2.Event() old_event.step = 0 old_event.wall_time = 456.75 - old_event.graph_def = b'' + old_event.graph_def = b"" new_events = self._migrate_event( old_event, experimental_filter_graph=True From 1690e7d935206b8814c1776a95a7f46c37f47c04 Mon Sep 17 00:00:00 2001 From: David Soergel Date: Mon, 13 Apr 2020 21:41:50 -0400 Subject: [PATCH 8/9] Simplify test, add comments --- tensorboard/dataclass_compat_test.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tensorboard/dataclass_compat_test.py b/tensorboard/dataclass_compat_test.py index cca3b8d696..30ef9dfa9a 100644 --- a/tensorboard/dataclass_compat_test.py +++ b/tensorboard/dataclass_compat_test.py @@ -286,11 +286,18 @@ def test_graph_def_experimental_filter_graph_empty(self): self.assertLen(new_events, 2) self.assertProtoEquals(new_events[0], old_event) + # We expect that parsing `b""` as a `GraphDef` produces the default + # 'empty' proto, just like `graph_pb2.GraphDef()`. Furthermore, we + # expect serializing that to produce `b""` again (as opposed to some + # other representation of the default proto, e.g. one in which default + # values are materialized). + # Here we extract the serialized graph_def from the migrated event, + # which is the result of such a deserialize/serialize cycle, to validate + # these expectations. new_event = new_events[1] tensor = tensor_util.make_ndarray(new_event.summary.value[0].tensor) new_graph_def_bytes = tensor[0] - new_graph_def = graph_pb2.GraphDef.FromString(new_graph_def_bytes) - self.assertProtoEquals(new_graph_def, graph_pb2.GraphDef()) + self.assertEqual(new_graph_def_bytes, b"") if __name__ == "__main__": From dfbc72f0404feb07bd499a480c0475d90d0d6ca1 Mon Sep 17 00:00:00 2001 From: David Soergel Date: Mon, 13 Apr 2020 21:43:36 -0400 Subject: [PATCH 9/9] update comment --- tensorboard/dataclass_compat_test.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorboard/dataclass_compat_test.py b/tensorboard/dataclass_compat_test.py index 30ef9dfa9a..ed7ceff7ad 100644 --- a/tensorboard/dataclass_compat_test.py +++ b/tensorboard/dataclass_compat_test.py @@ -294,6 +294,8 @@ def test_graph_def_experimental_filter_graph_empty(self): # Here we extract the serialized graph_def from the migrated event, # which is the result of such a deserialize/serialize cycle, to validate # these expectations. + # This also demonstrates that empty `GraphDefs` are not rejected or + # ignored. new_event = new_events[1] tensor = tensor_util.make_ndarray(new_event.summary.value[0].tensor) new_graph_def_bytes = tensor[0]