Simplify the logic to initialize FFmpeg

Differential Revision: D50193749 Pull Request resolved: pytorch#3650
hwangjeff · Oct 12, 2023 · f62367a · f62367a
1 parent d947dee
commit f62367a
Show file tree

Hide file tree

Showing 14 changed files with 151 additions and 205 deletions.
diff --git a/src/libtorchaudio/pybind/pybind.cpp b/src/libtorchaudio/pybind/pybind.cpp
@@ -8,7 +8,6 @@ PYBIND11_MODULE(_torchaudio, m) {
   m.def("is_rir_available", &is_rir_available, "");
   m.def("is_align_available", &is_align_available, "");
   m.def("cuda_version", &cuda_version, "");
-  m.def("find_avutil", &find_avutil, "");
 }
 
 } // namespace

diff --git a/src/libtorchaudio/utils.cpp b/src/libtorchaudio/utils.cpp
@@ -31,10 +31,4 @@ c10::optional<int64_t> cuda_version() {
 #endif
 }
 
-int find_avutil(const char* name) {
-  auto lib = at::DynamicLibrary{name};
-  auto avutil_version = (unsigned (*)())(lib.sym("avutil_version"));
-  return static_cast<int>(avutil_version() >> 16);
-}
-
 } // namespace torchaudio
diff --git a/src/libtorchaudio/utils.h b/src/libtorchaudio/utils.h
@@ -5,5 +5,4 @@ namespace torchaudio {
 bool is_rir_available();
 bool is_align_available();
 c10::optional<int64_t> cuda_version();
-int find_avutil(const char* name);
 } // namespace torchaudio
diff --git a/src/torchaudio/_backend/utils.py b/src/torchaudio/_backend/utils.py
@@ -4,7 +4,7 @@
 
 import torch
 
-from torchaudio._extension import _FFMPEG_EXT, _SOX_INITIALIZED
+from torchaudio._extension import _SOX_INITIALIZED, lazy_import_ffmpeg_ext
 
 from . import soundfile_backend
 
@@ -18,7 +18,7 @@
 @lru_cache(None)
 def get_available_backends() -> Dict[str, Type[Backend]]:
     backend_specs: Dict[str, Type[Backend]] = {}
-    if _FFMPEG_EXT is not None:
+    if lazy_import_ffmpeg_ext().is_available():
         backend_specs["ffmpeg"] = FFmpegBackend
     if _SOX_INITIALIZED:
         backend_specs["sox"] = SoXBackend

diff --git a/src/torchaudio/_extension/__init__.py b/src/torchaudio/_extension/__init__.py
@@ -4,11 +4,15 @@
 
 from torchaudio._internal.module_utils import eval_env, fail_with_message, is_module_available, no_op
 
-try:
-    from .fb import _init_ffmpeg
-except ImportError:
-    from .utils import _init_ffmpeg
-from .utils import _check_cuda_version, _fail_since_no_ffmpeg, _fail_since_no_sox, _init_dll_path, _init_sox, _load_lib
+from .utils import (
+    _check_cuda_version,
+    _fail_since_no_sox,
+    _init_dll_path,
+    _init_ffmpeg,
+    _init_sox,
+    _LazyImporter,
+    _load_lib,
+)
 
 _LG = logging.getLogger(__name__)
 
@@ -19,12 +23,11 @@
 # https://github.com/pytorch/builder/blob/e2e4542b8eb0bdf491214451a1a4128bd606cce2/test/smoke_test/smoke_test.py#L80
 __all__ = [
     "fail_if_no_sox",
-    "fail_if_no_ffmpeg",
     "_check_cuda_version",
     "_IS_TORCHAUDIO_EXT_AVAILABLE",
     "_IS_RIR_AVAILABLE",
     "_SOX_INITIALIZED",
-    "_FFMPEG_EXT",
+    "lazy_import_ffmpeg_ext",
 ]
 
 
@@ -81,25 +84,16 @@
     fail_if_no_sox = no_op if _SOX_INITIALIZED else _fail_since_no_sox
 
 
-# Initialize FFmpeg-related features
 _FFMPEG_EXT = None
-_USE_FFMPEG = eval_env("TORCHAUDIO_USE_FFMPEG", True)
-if _USE_FFMPEG and _IS_TORCHAUDIO_EXT_AVAILABLE:
-    try:
-        _FFMPEG_EXT = _init_ffmpeg()
-    except Exception:
-        # The initialization of FFmpeg extension will fail if supported FFmpeg
-        # libraries are not found in the system.
-        # Since the rest of the torchaudio works without it, we do not report the
-        # error here.
-        # The error will be raised when user code attempts to use these features.
-        _LG.debug("Failed to initialize ffmpeg bindings", exc_info=True)
 
 
-if _USE_FFMPEG:
-    fail_if_no_ffmpeg = _fail_since_no_ffmpeg if _FFMPEG_EXT is None else no_op
-else:
-    fail_if_no_ffmpeg = fail_with_message("requires ffmpeg extension, but it is disabled. (TORCHAUDIO_USE_FFMPEG=0)")
+def lazy_import_ffmpeg_ext():
+    """Load FFmpeg integration based on availability in lazy manner"""
+
+    global _FFMPEG_EXT
+    if _FFMPEG_EXT is None:
+        _FFMPEG_EXT = _LazyImporter("_torchaudio_ffmpeg", _init_ffmpeg)
+    return _FFMPEG_EXT
 
 
 fail_if_no_rir = (

diff --git a/src/torchaudio/_extension/utils.py b/src/torchaudio/_extension/utils.py
@@ -5,17 +5,14 @@
 Anything that depends on external state should happen in __init__.py
 """
 
-
 import importlib
 import logging
 import os
-import platform
-import warnings
+import types
 from functools import wraps
 from pathlib import Path
 
 import torch
-import torchaudio
 
 _LG = logging.getLogger(__name__)
 _LIB_DIR = Path(__file__).parent.parent / "lib"
@@ -62,7 +59,6 @@ def _load_lib(lib: str) -> bool:
     if not path.exists():
         return False
     torch.ops.load_library(path)
-    torch.classes.load_library(path)
     return True
 
 
@@ -78,94 +74,98 @@ def _init_sox():
     atexit.register(torch.ops.torchaudio.sox_effects_shutdown_sox_effects)
 
 
-def _try_access_avutil(ffmpeg_ver):
-    libname_template = {
-        "Linux": "libavutil.so.{ver}",
-        "Darwin": "libavutil.{ver}.dylib",
-        "Windows": "avutil-{ver}.dll",
-    }[platform.system()]
-    avutil_ver = {"6": 58, "5": 57, "4": 56}[ffmpeg_ver]
-    libavutil = libname_template.format(ver=avutil_ver)
-    torchaudio.lib._torchaudio.find_avutil(libavutil)
-
-
-def _find_versionsed_ffmpeg_extension(ffmpeg_ver: str):
-    _LG.debug("Attempting to load FFmpeg version %s.", ffmpeg_ver)
-
-    library = f"libtorchaudio_ffmpeg{ffmpeg_ver}"
-    extension = f"_torchaudio_ffmpeg{ffmpeg_ver}"
-
-    if not _get_lib_path(extension).exists():
-        raise RuntimeError(f"FFmpeg {ffmpeg_ver} extension is not available.")
+_FFMPEG_VERS = ["6", "5", "4", ""]
 
-    if ffmpeg_ver:
-        # A simple check for FFmpeg availability.
-        # This is not technically sufficient as other libraries could be missing,
-        # but usually this is sufficient.
-        #
-        # Note: the reason why this check is performed is because I don't know
-        # if the next `_load_lib` (which calls `ctypes.CDLL` under the hood),
-        # could leak handle to shared libraries of dependencies, in case it fails.
-        #
-        # i.e. If the `ctypes.CDLL("foo")` fails because one of `foo`'s dependency
-        # does not exist while `foo` and some other dependencies exist, is it guaranteed
-        # that none-of them are kept in memory after the failure??
-        _try_access_avutil(ffmpeg_ver)
 
-    _load_lib(library)
+def _find_versionsed_ffmpeg_extension(version: str):
+    _LG.debug("Attempting to load FFmpeg%s", version)
 
-    _LG.debug("Found FFmpeg version %s.", ffmpeg_ver)
-    return importlib.import_module(f"torchaudio.lib.{extension}")
+    ext = f"torchaudio.lib._torchaudio_ffmpeg{version}"
+    lib = f"libtorchaudio_ffmpeg{version}"
 
+    if not importlib.util.find_spec(ext):
+        raise RuntimeError(f"FFmpeg{version} extension is not available.")
 
-_FFMPEG_VERS = ["6", "5", "4", ""]
+    _load_lib(lib)
+    return importlib.import_module(ext)
 
 
-def _find_ffmpeg_extension(ffmpeg_vers, show_error):
-    logger = _LG.error if show_error else _LG.debug
+def _find_ffmpeg_extension(ffmpeg_vers):
     for ffmpeg_ver in ffmpeg_vers:
         try:
             return _find_versionsed_ffmpeg_extension(ffmpeg_ver)
         except Exception:
-            logger("Failed to load FFmpeg %s extension.", ffmpeg_ver, exc_info=True)
+            _LG.debug("Failed to load FFmpeg%s extension.", ffmpeg_ver, exc_info=True)
             continue
-    raise ImportError(f"Failed to intialize FFmpeg extension. Tried versions: {ffmpeg_vers}")
-
+    raise ImportError(
+        f"Failed to intialize FFmpeg extension. Tried versions: {ffmpeg_vers}. "
+        "Enable DEBUG logging to see more details about the error."
+    )
 
-def _find_available_ffmpeg_ext():
-    ffmpeg_vers = ["6", "5", "4", ""]
-    return [v for v in ffmpeg_vers if _get_lib_path(f"_torchaudio_ffmpeg{v}").exists()]
 
+def _get_ffmpeg_versions():
+    ffmpeg_vers = _FFMPEG_VERS
+    # User override
+    if (ffmpeg_ver := os.environ.get("TORCHAUDIO_USE_FFMPEG_VERSION")) is not None:
+        if ffmpeg_ver not in ffmpeg_vers:
+            raise ValueError(
+                f"The FFmpeg version '{ffmpeg_ver}' (read from TORCHAUDIO_USE_FFMPEG_VERSION) "
+                f"is not one of supported values. Possible values are {ffmpeg_vers}"
+            )
+        ffmpeg_vers = [ffmpeg_ver]
+    return ffmpeg_vers
 
-def _init_ffmpeg(show_error=False):
-    ffmpeg_vers = _find_available_ffmpeg_ext()
-    if not ffmpeg_vers:
-        raise RuntimeError(
-            # fmt: off
-            "TorchAudio is not built with FFmpeg integration. "
-            "Please build torchaudio with USE_FFMPEG=1."
-            # fmt: on
-        )
 
-    # User override
-    if ffmpeg_ver := os.environ.get("TORCHAUDIO_USE_FFMPEG_VERSION"):
-        if ffmpeg_vers == [""]:
-            warnings.warn("TorchAudio is built in single FFmpeg mode. TORCHAUDIO_USE_FFMPEG_VERSION is ignored.")
-        else:
-            if ffmpeg_ver not in ffmpeg_vers:
-                raise ValueError(
-                    f"The FFmpeg version {ffmpeg_ver} (read from TORCHAUDIO_USE_FFMPEG_VERSION) "
-                    f"is not available. Available versions are {[v for v in ffmpeg_vers if v]}"
-                )
-            ffmpeg_vers = [ffmpeg_ver]
-
-    ext = _find_ffmpeg_extension(ffmpeg_vers, show_error)
+def _init_ffmpeg():
+    ffmpeg_vers = _get_ffmpeg_versions()
+    ext = _find_ffmpeg_extension(ffmpeg_vers)
     ext.init()
     if ext.get_log_level() > 8:
         ext.set_log_level(8)
     return ext
 
 
+class _LazyImporter(types.ModuleType):
+    """Lazily import module/extension."""
+
+    def __init__(self, name, import_func):
+        super().__init__(name)
+        self.import_func = import_func
+        self.module = None
+
+    # Note:
+    # Python caches what was retrieved with `__getattr__`, so this method will not be
+    # called again for the same item.
+    def __getattr__(self, item):
+        self._import_once()
+        return getattr(self.module, item)
+
+    def __repr__(self):
+        if self.module is None:
+            return f"<module '{self.__module__}.{self.__class__.__name__}(\"{self.name}\")'>"
+        return repr(self.module)
+
+    def __dir__(self):
+        self._import_once()
+        return dir(self.module)
+
+    def _import_once(self):
+        if self.module is None:
+            self.module = self.import_func()
+            # Note:
+            # By attaching the module attributes to self,
+            # module attributes are directly accessible.
+            # This allows to avoid calling __getattr__ for every attribute access.
+            self.__dict__.update(self.module.__dict__)
+
+    def is_available(self):
+        try:
+            self._import_once()
+        except Exception:
+            return False
+        return True
+
+
 def _init_dll_path():
     # On Windows Python-3.8+ has `os.add_dll_directory` call,
     # which is called to configure dll search path.
@@ -182,6 +182,8 @@ def _init_dll_path():
 
 
 def _check_cuda_version():
+    import torchaudio.lib._torchaudio
+
     version = torchaudio.lib._torchaudio.cuda_version()
     if version is not None and torch.version.cuda is not None:
         version_str = str(version)
@@ -214,22 +216,3 @@ def wrapped(*_args, **_kwargs):
         return func(*_args, **_kwargs)
 
     return wrapped
-
-
-def _fail_since_no_ffmpeg(func):
-    @wraps(func)
-    def wrapped(*_args, **_kwargs):
-        try:
-            # Note:
-            # We run _init_ffmpeg again just to show users the stacktrace.
-            # _init_ffmpeg would not succeed here.
-            _init_ffmpeg(show_error=True)
-        except Exception as err:
-            raise RuntimeError(
-                f"{func.__name__} requires FFmpeg extension which is not available. "
-                "Please refer to the stacktrace above for how to resolve this."
-            ) from err
-        # This should not happen in normal execution, but just in case.
-        return func(*_args, **_kwargs)
-
-    return wrapped
diff --git a/src/torchaudio/io/_playback.py b/src/torchaudio/io/_playback.py
@@ -15,7 +15,6 @@
 }
 
 
-@torchaudio._extension.fail_if_no_ffmpeg
 def play_audio(
     waveform: torch.Tensor,
     sample_rate: Optional[float],
@@ -57,7 +56,9 @@ def play_audio(
     time, num_channels = waveform.size()
     if num_channels > 2:
         warnings.warn(
-            f"Expected up to 2 channels, got {num_channels} channels instead. Only the first 2 channels will be played."
+            f"Expected up to 2 channels, got {num_channels} channels instead. "
+            "Only the first 2 channels will be played.",
+            stacklevel=2,
         )
 
     # Write to speaker device

diff --git a/src/torchaudio/io/_stream_reader.py b/src/torchaudio/io/_stream_reader.py
@@ -9,11 +9,7 @@
 import torchaudio
 from torch.utils._pytree import tree_map
 
-if torchaudio._extension._FFMPEG_EXT is not None:
-    _StreamReader = torchaudio._extension._FFMPEG_EXT.StreamReader
-    _StreamReaderBytes = torchaudio._extension._FFMPEG_EXT.StreamReaderBytes
-    _StreamReaderFileObj = torchaudio._extension._FFMPEG_EXT.StreamReaderFileObj
-
+ffmpeg_ext = torchaudio._extension.lazy_import_ffmpeg_ext()
 
 __all__ = [
     "StreamReader",
@@ -442,7 +438,6 @@ def decorator(obj):
 OutputStreamTypes = TypeVar("OutputStream", bound=OutputStream)
 
 
-@torchaudio._extension.fail_if_no_ffmpeg
 class StreamReader:
     """Fetch and decode audio/video streams chunk by chunk.
 
@@ -524,11 +519,11 @@ def __init__(
     ):
         self.src = src
         if isinstance(src, bytes):
-            self._be = _StreamReaderBytes(src, format, option, buffer_size)
+            self._be = ffmpeg_ext.StreamReaderBytes(src, format, option, buffer_size)
         elif hasattr(src, "read"):
-            self._be = _StreamReaderFileObj(src, format, option, buffer_size)
+            self._be = ffmpeg_ext.StreamReaderFileObj(src, format, option, buffer_size)
         else:
-            self._be = _StreamReader(os.path.normpath(src), format, option)
+            self._be = ffmpeg_ext.StreamReader(os.path.normpath(src), format, option)
 
         i = self._be.find_best_audio_stream()
         self._default_audio_stream = None if i < 0 else i