Skip to content

Commit

Permalink
manager: improve error messages for exec failures (#2395)
Browse files Browse the repository at this point in the history
Summary:
This gives users a more explicit error message for common configuration
errors, like virtualenvs with `--system-site-packages` that mix and
match packages and binaries across environments.

Test Plan:
Automated tests cover changes to `manager.py`. For notebook changes,
build the Pip package, and install it and `IPython` to a new virtualenv.
From an IPython shell, run through all the cases:

```
In [1]: %load_ext tensorboard
In [2]: import os
In [3]: os.environ["PATH"] = ""
In [4]: %tensorboard --logdir logs
Launching TensorBoard...
ERROR: Could not find `tensorboard`. Please ensure that your PATH
contains an executable `tensorboard` program, or explicitly specify
the path to a TensorBoard binary by setting the `TENSORBOARD_BINARY`
environment variable.
In [5]: os.environ["TENSORBOARD_BINARY"] = "/not/likely"
In [6]: %tensorboard --logdir logs
Launching TensorBoard...
ERROR: Could not find '/not/likely' (set by the `TENSORBOARD_BINARY`
environment variable). Please ensure that your PATH contains an
executable `tensorboard` program, or explicitly specify the path to a
TensorBoard binary by setting the `TENSORBOARD_BINARY` environment
variable.
In [7]: os.environ["TENSORBOARD_BINARY"] = "."  # EACCES
In [8]: %tensorboard --logdir logs
Launching TensorBoard...
ERROR: Failed to start '.' (set by the `TENSORBOARD_BINARY`
environment variable): [Errno 13] Permission denied: '.'
```

wchargin-branch: manager-execfailed
  • Loading branch information
wchargin authored Jun 28, 2019
1 parent ff4103d commit 1065ddc
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 3 deletions.
21 changes: 18 additions & 3 deletions tensorboard/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ def get_all():
return results


# The following four types enumerate the possible return values of the
# The following five types enumerate the possible return values of the
# `start` function.

# Indicates that a call to `start` was compatible with an existing
Expand All @@ -351,6 +351,19 @@ def get_all():
),
)

# Indicates that a call to `start` failed to invoke the subprocess.
#
# If the TensorBoard executable was chosen via the `TENSORBOARD_BINARY`
# environment variable, then the `explicit_binary` field contains the
# path to that binary; otherwise, the field is `None`.
StartExecFailed = collections.namedtuple(
"StartExecFailed",
(
"os_error", # `OSError` due to `Popen` invocation
"explicit_binary", # `str` or `None`; see type-level comment
),
)

# Indicates that a call to `start` launched a TensorBoard process, but
# that process neither exited nor wrote its info file within the allowed
# timeout period. The process may still be running under the included
Expand Down Expand Up @@ -397,13 +410,15 @@ def start(arguments, timeout=datetime.timedelta(seconds=60)):
(stdout_fd, stdout_path) = tempfile.mkstemp(prefix=".tensorboard-stdout-")
(stderr_fd, stderr_path) = tempfile.mkstemp(prefix=".tensorboard-stderr-")
start_time_seconds = time.time()
tensorboard_binary = os.environ.get("TENSORBOARD_BINARY", "tensorboard")
explicit_tb = os.environ.get("TENSORBOARD_BINARY", None)
try:
p = subprocess.Popen(
[tensorboard_binary] + arguments,
["tensorboard" if explicit_tb is None else explicit_tb] + arguments,
stdout=stdout_fd,
stderr=stderr_fd,
)
except OSError as e:
return StartExecFailed(os_error=e, explicit_binary=explicit_tb)
finally:
os.close(stdout_fd)
os.close(stderr_fd)
Expand Down
27 changes: 27 additions & 0 deletions tensorboard/manager_e2e_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,33 @@ def test_tensorboard_binary_environment_variable(self):
)
self.assertEqual(manager.get_all(), [])

def test_exec_failure_with_explicit_binary(self):
path = os.path.join(".", "non", "existent")
environ = {"TENSORBOARD_BINARY": path}
environ_patcher = mock.patch.dict(os.environ, environ)
environ_patcher.start()
self.addCleanup(environ_patcher.stop)

start_result = manager.start(["--logdir=./logs", "--port=0"])
self.assertIsInstance(start_result, manager.StartExecFailed)
self.assertEqual(start_result.os_error.errno, errno.ENOENT)
self.assertEqual(start_result.explicit_binary, path)

def test_exec_failure_with_no_explicit_binary(self):
if os.name == "nt":
# Can't use ENOENT without an absolute path (it's not treated as
# an exec failure).
self.skipTest("Not clear how to trigger this case on Windows.")
environ = {"PATH": "nope"}
environ_patcher = mock.patch.dict(os.environ, environ)
environ_patcher.start()
self.addCleanup(environ_patcher.stop)

start_result = manager.start(["--logdir=./logs", "--port=0"])
self.assertIsInstance(start_result, manager.StartExecFailed)
self.assertEqual(start_result.os_error.errno, errno.ENOENT)
self.assertIs(start_result.explicit_binary, None)


if __name__ == "__main__":
tf.test.main()
24 changes: 24 additions & 0 deletions tensorboard/notebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@
from __future__ import print_function

import datetime
import errno
import shlex
import sys
import textwrap
import time

from tensorboard import manager
Expand Down Expand Up @@ -191,6 +193,28 @@ def format_stream(name, value):
)
print_or_update(message)

elif isinstance(start_result, manager.StartExecFailed):
the_tensorboard_binary = (
"%r (set by the `TENSORBOARD_BINARY` environment variable)"
% (start_result.explicit_binary,)
if start_result.explicit_binary is not None
else "`tensorboard`"
)
if start_result.os_error.errno == errno.ENOENT:
message = (
"ERROR: Could not find %s. Please ensure that your PATH contains "
"an executable `tensorboard` program, or explicitly specify the path "
"to a TensorBoard binary by setting the `TENSORBOARD_BINARY` "
"environment variable."
% (the_tensorboard_binary,)
)
else:
message = (
"ERROR: Failed to start %s: %s"
% (the_tensorboard_binary, start_result.os_error)
)
print_or_update(textwrap.fill(message))

elif isinstance(start_result, manager.StartTimedOut):
message = (
"ERROR: Timed out waiting for TensorBoard to start. "
Expand Down

0 comments on commit 1065ddc

Please sign in to comment.