From 579c189f54d2de7a04cb75d2478a075cd5724625 Mon Sep 17 00:00:00 2001 From: F-G Fernandez <26927750+frgfm@users.noreply.github.com> Date: Tue, 31 May 2022 12:48:01 +0200 Subject: [PATCH] style: Added black formatting & refactored configs into pyproject.toml (#58) * fix: Fixed collect_env nvidia smi * chore: Refactored config into pyproject.toml * ci: Updated dependency check * ci: Updated dep check * ci: Added CI job for black formatting * docs: Updated makefile * docs: Updated CONTRIBUTING * style: Applied black formatting * chore: Fixed pydocstyle version * ci: Fixed style checks * style: Made mypy stricter * style: Fixed typing * style: Fixed typing * refactor: Removed unused import --- .flake8 | 5 + .github/validate_deps.py | 44 ++++----- .github/validate_headers.py | 19 ++-- .github/verify_labels.py | 12 ++- .github/workflows/main.yml | 2 +- .github/workflows/style.yml | 22 ++++- CONTRIBUTING.md | 18 +++- Makefile | 2 + docs/source/conf.py | 47 +++++----- pyproject.toml | 114 +++++++++++++++++++++++ scripts/benchmark.py | 73 ++++++++++----- scripts/collect_env.py | 140 ++++++++++++++-------------- setup.cfg | 90 ------------------ setup.py | 2 +- tests/requirements.txt | 2 +- tests/test_crawler.py | 20 ++-- tests/test_utils.py | 14 +-- torchscan/crawler.py | 161 ++++++++++++++++++--------------- torchscan/modules/flops.py | 70 ++++++-------- torchscan/modules/macs.py | 38 ++++---- torchscan/modules/memory.py | 26 +++--- torchscan/modules/receptive.py | 38 +++++--- torchscan/process/memory.py | 4 +- torchscan/utils.py | 139 ++++++++++++++-------------- 24 files changed, 591 insertions(+), 511 deletions(-) create mode 100644 .flake8 create mode 100644 pyproject.toml delete mode 100644 setup.cfg diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..4bbebd7 --- /dev/null +++ b/.flake8 @@ -0,0 +1,5 @@ +[flake8] +max-line-length = 120 +ignore = E203, E402, E265, F403, W503, W504, E731 +exclude = .github, .git, venv*, docs, build +per-file-ignores = **/__init__.py:F401 diff --git a/.github/validate_deps.py b/.github/validate_deps.py index f85831e..313b741 100644 --- a/.github/validate_deps.py +++ b/.github/validate_deps.py @@ -1,6 +1,7 @@ from pathlib import Path import requirements +import toml from requirements.requirement import Requirement # All req files to check @@ -8,10 +9,9 @@ "test": "tests/requirements.txt", "docs": "docs/requirements.txt", } -EXTRA_IGNORE = ["dev"] -def parse_deps(deps): +def check_deps(deps): reqs = {} for _dep in deps: lib, specs = _dep @@ -26,10 +26,10 @@ def get_conficts(setup_reqs, requirement_file): # Parse the deps from the requirements.txt folder = Path(__file__).parent.parent.absolute() req_deps = {} - with open(folder.joinpath(requirement_file), 'r') as f: + with open(folder.joinpath(requirement_file), "r") as f: _deps = [(req.name, req.specs) for req in requirements.parse(f)] - req_deps = parse_deps(_deps) + req_deps = check_deps(_deps) # Compare them assert len(req_deps) == len(setup_reqs) @@ -46,32 +46,18 @@ def main(): # Collect the one from setup.py folder = Path(__file__).parent.parent.absolute() - with open(folder.joinpath("setup.cfg"), 'r') as f: - setup = f.readlines() + toml_reqs = toml.load(folder.joinpath("pyproject.toml")) # install_requires - lines = setup[setup.index("install_requires =\n") + 1:] - lines = [_dep.strip() for _dep in lines[:lines.index("\n")]] - _reqs = [Requirement.parse(_line) for _line in lines] - install_requires = parse_deps([(req.name, req.specs) for req in _reqs]) + _reqs = [Requirement.parse(_line) for _line in toml_reqs["project"]["dependencies"]] + install_requires = check_deps([(req.name, req.specs) for req in _reqs]) # extras - extras_require = {} - lines = setup[setup.index("[options.extras_require]\n") + 1:] - lines = lines[:lines.index("\n")] - # Split each extra - extra_lines = [_line for _line in lines if str.isalpha(_line[0])] - extra_names = [_line.strip().replace("=", "").strip() for _line in extra_lines] - for current_extra, start_line, end_line in zip(extra_names, extra_lines, extra_lines[1:] + [None]): - if current_extra in EXTRA_IGNORE: - continue - _lines = [_dep for _dep in lines[lines.index(start_line) + 1:]] - if isinstance(end_line, str): - _lines = _lines[:_lines.index(end_line)] - # Remove comments - _lines = [_line.strip() for _line in _lines] - _reqs = [Requirement.parse(_line.strip()) for _line in _lines if not _line.strip().startswith("#")] - extras_require[current_extra] = parse_deps([(req.name, req.specs) for req in _reqs]) + extras_require = { + k: [Requirement.parse(_line) for _line in lines] + for k, lines in toml_reqs["project"]["optional-dependencies"].items() + } + extras_require = {k: check_deps([(req.name, req.specs) for req in _reqs]) for k, _reqs in extras_require.items()} # Resolve conflicts mismatches = {} @@ -82,11 +68,13 @@ def main(): # Display the results if any(len(mismatch) > 0 for mismatch in mismatches.values()): mismatch_str = "version specifiers mismatches:\n" - mismatch_str += '\n'.join( + mismatch_str += "\n".join( f"- {lib}: {setup} (from setup.cfg) | {reqs} (from {req_file})" - for req_file, issues in mismatches.items() for lib, setup, reqs in issues + for req_file, issues in mismatches.items() + for lib, setup, reqs in issues ) raise AssertionError(mismatch_str) + if __name__ == "__main__": main() diff --git a/.github/validate_headers.py b/.github/validate_headers.py index 8ef5d9d..57bddda 100644 --- a/.github/validate_headers.py +++ b/.github/validate_headers.py @@ -9,23 +9,16 @@ current_year = datetime.now().year year_options = [f"{current_year}"] + [f"{year}-{current_year}" for year in range(starting_year, current_year)] -copyright_notices = [ - [f"# Copyright (C) {year_str}, François-Guillaume Fernandez.\n"] - for year_str in year_options -] +copyright_notices = [[f"# Copyright (C) {year_str}, François-Guillaume Fernandez.\n"] for year_str in year_options] license_notice = [ "# This program is licensed under the Apache License version 2.\n", - "# See LICENSE or go to for full license details.\n" + "# See LICENSE or go to for full license details.\n", ] # Define all header options HEADERS = [ - shebang + [blank_line] + copyright_notice + [blank_line] + license_notice - for copyright_notice in copyright_notices -] + [ - copyright_notice + [blank_line] + license_notice - for copyright_notice in copyright_notices -] + shebang + [blank_line] + copyright_notice + [blank_line] + license_notice for copyright_notice in copyright_notices +] + [copyright_notice + [blank_line] + license_notice for copyright_notice in copyright_notices] IGNORED_FILES = ["version.py", "__init__.py"] @@ -38,7 +31,7 @@ def main(): # For every python file in the repository for folder in FOLDERS: - for source_path in Path(__file__).parent.parent.joinpath(folder).rglob('**/*.py'): + for source_path in Path(__file__).parent.parent.joinpath(folder).rglob("**/*.py"): if source_path.name not in IGNORED_FILES: # Parse header header_length = max(len(option) for option in HEADERS) @@ -50,7 +43,7 @@ def main(): break # Validate it if not any( - "".join(current_header[:min(len(option), len(current_header))]) == "".join(option) + "".join(current_header[: min(len(option), len(current_header))]) == "".join(option) for option in HEADERS ): invalid_files.append(source_path) diff --git a/.github/verify_labels.py b/.github/verify_labels.py index 5d109cb..415ddf0 100644 --- a/.github/verify_labels.py +++ b/.github/verify_labels.py @@ -35,8 +35,8 @@ "topic: ci", } -GH_ORG = 'frgfm' -GH_REPO = 'torch-scan' +GH_ORG = "frgfm" +GH_REPO = "torch-scan" def query_repo(cmd: str, *, accept) -> Any: @@ -61,10 +61,12 @@ def main(args): def parse_args(): import argparse - parser = argparse.ArgumentParser(description='PR label checker', - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument('pr', type=int, help='PR number') + parser = argparse.ArgumentParser( + description="PR label checker", formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + parser.add_argument("pr", type=int, help="PR number") args = parser.parse_args() return args diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 7316839..1d8d0ce 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -125,6 +125,6 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install requirements-parser==0.2.0 + pip install requirements-parser==0.2.0 toml - name: Run unittests run: python .github/validate_deps.py diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml index 8f9def5..24dac75 100644 --- a/.github/workflows/style.yml +++ b/.github/workflows/style.yml @@ -74,7 +74,7 @@ jobs: - name: Run mypy run: | mypy --version - mypy --config-file mypy.ini torchscan/ + mypy torchscan/ pydocstyle-py3: runs-on: ${{ matrix.os }} @@ -91,7 +91,25 @@ jobs: architecture: x64 - name: Run pydocstyle run: | - pip install pydocstyle + pip install "pydocstyle[toml]" pydocstyle --version pydocstyle torchscan/ + black-py3: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest] + python: [3.7] + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python }} + architecture: x64 + - name: Run black + run: | + pip install black + black --version + black --check --diff . diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1711c8a..949938b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -87,19 +87,19 @@ make quality ##### Lint verification -To ensure that your incoming PR complies with the lint settings, you need to install [flake8](https://flake8.pycqa.org/en/latest/) and run the following command from the repository's root folder: +To ensure that your incoming PR complies with the lint settings, you need to install [flake8](https://flake8.pycqa.org/en/latest/), [black](https://black.readthedocs.io/en/stable/) and run the following command from the repository's root folder: ```shell flake8 ./ +black --check . ``` -This will read the `.flake8` setting file and let you know whether your commits need some adjustments. ##### Import order In order to ensure there is a common import order convention, run [isort](https://github.com/PyCQA/isort) as follows: ```shell -isort **/*.py +isort . ``` This will reorder the imports of your local files. @@ -108,9 +108,17 @@ This will reorder the imports of your local files. Additionally, to catch type-related issues and have a cleaner codebase, annotation typing are expected. After installing [mypy](https://github.com/python/mypy), you can run the verifications as follows: ```shell -mypy --config-file mypy.ini +mypy torchscan/ ``` -The `mypy.ini` file will be read to check your typing. + +##### Docstring style + +Finally, documentation being important, [pydocstyle](https://github.com/PyCQA/pydocstyle) will be checking the docstrings: + +```shell +pydocstyle torchscan/ +``` + ### Submit your modifications diff --git a/Makefile b/Makefile index a87e974..3e674d5 100644 --- a/Makefile +++ b/Makefile @@ -4,10 +4,12 @@ quality: flake8 ./ mypy torchscan/ pydocstyle torchscan/ + black --check . # this target runs checks on all files and potentially modifies some of them style: isort . + black . # Run tests for the library test: diff --git a/docs/source/conf.py b/docs/source/conf.py index d34b05a..07ab350 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -20,21 +20,21 @@ import sphinx_rtd_theme -sys.path.insert(0, os.path.abspath('../..')) +sys.path.insert(0, os.path.abspath("../..")) from datetime import datetime import torchscan # -- Project information ----------------------------------------------------- -master_doc = 'index' -project = 'torchscan' +master_doc = "index" +project = "torchscan" copyright = f"2020-{datetime.now().year}, François-Guillaume Fernandez" -author = 'François-Guillaume Fernandez' +author = "François-Guillaume Fernandez" # The full version, including alpha/beta/rc tags version = torchscan.__version__ -release = torchscan.__version__ + '-git' +release = torchscan.__version__ + "-git" # -- General configuration --------------------------------------------------- @@ -43,35 +43,35 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.napoleon', - 'sphinx.ext.viewcode', - 'sphinx.ext.mathjax', - 'sphinxemoji.sphinxemoji', # cf. https://sphinxemojicodes.readthedocs.io/en/stable/ - 'sphinx_copybutton', + "sphinx.ext.autodoc", + "sphinx.ext.napoleon", + "sphinx.ext.viewcode", + "sphinx.ext.mathjax", + "sphinxemoji.sphinxemoji", # cf. https://sphinxemojicodes.readthedocs.io/en/stable/ + "sphinx_copybutton", ] napoleon_use_ivar = True # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. -exclude_patterns = [u'_build', 'Thumbs.db', '.DS_Store'] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' -highlight_language = 'python3' +pygments_style = "sphinx" +highlight_language = "python3" # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'sphinx_rtd_theme' +html_theme = "sphinx_rtd_theme" html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] # Theme options are theme-specific and customize the look and feel of a theme @@ -79,18 +79,19 @@ # documentation. # html_theme_options = { - 'collapse_navigation': False, - 'display_version': False, - 'logo_only': False, - 'analytics_id': 'UA-148140560-3', + "collapse_navigation": False, + "display_version": False, + "logo_only": False, + "analytics_id": "UA-148140560-3", } # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] + def setup(app): - app.add_css_file('css/custom.css') - app.add_js_file('js/custom.js') + app.add_css_file("css/custom.css") + app.add_js_file("js/custom.js") diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..05563ad --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,114 @@ +[build-system] +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "torchscan" +description = "Useful information about your Pytorch module" +authors = [ + {email = "fg-feedback@protonmail.com"}, + {name = "François-Guillaume Fernandez"} +] +readme = "README.md" +requires-python = ">=3.6,<4" +license = {file = "LICENSE"} +keywords = ["pytorch", "deep learning", "summary", "memory", "ram"] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: Apache Software License", + "Natural Language :: English", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Topic :: Scientific/Engineering", + "Topic :: Scientific/Engineering :: Mathematics", + "Topic :: Scientific/Engineering :: Artificial Intelligence", +] +dynamic = ["version"] +dependencies = [ + "torch>=1.5.0", +] + +[project.optional-dependencies] +test = [ + "pytest>=5.3.2", + "coverage[toml]>=4.5.4", +] +quality = [ + "flake8>=3.9.0", + "isort>=5.7.0", + "mypy>=0.812", + "pydocstyle[toml]>=6.0.0", + "black>=22.1,<23.0", +] +docs = [ + "sphinx<=3.4.3", + "sphinx-rtd-theme==0.4.3", + "sphinxemoji>=0.1.8", + "sphinx-copybutton>=0.3.1", + "docutils<0.18", + # cf. https://github.com/readthedocs/readthedocs.org/issues/9038 + "Jinja2<3.1", +] +dev = [ + # test + "pytest>=5.3.2", + "coverage[toml]>=4.5.4", + # style + "flake8>=3.9.0", + "isort>=5.7.0", + "mypy>=0.812", + "pydocstyle[toml]>=6.0.0", + "black>=22.1,<23.0", + # docs + "sphinx<=3.4.3", + "sphinx-rtd-theme==0.4.3", + "sphinxemoji>=0.1.8", + "sphinx-copybutton>=0.3.1", + "docutils<0.18", + "Jinja2<3.1", +] + +[project.urls] +documentation = "https://frgfm.github.io/torch-scan" +repository = "https://github.com/frgfm/torch-scan" +tracker = "https://github.com/frgfm/torch-scan/issues" + +[tool.setuptools] +zip-safe = true + +[tool.setuptools.packages.find] +exclude = ["docs*", "scripts*", "tests*"] + + +[tool.mypy] +files = "torchscan/*.py" +show_error_codes = true +pretty = true +warn_unused_ignores = true +warn_redundant_casts = true +no_implicit_optional = true +disallow_untyped_calls = true +check_untyped_defs = true +implicit_reexport = false + +[tool.isort] +line_length = 120 +src_paths = ["torchscan", "tests"] +skip_glob = "**/__init__.py" +known_third_party = ["torch", "torchvision"] + +[tool.pydocstyle] +select = "D300,D301,D417" +match = ".*\\.py" + +[tool.coverage.run] +source = ["torchscan"] + +[tool.black] +line-length = 120 +target-version = ['py38'] diff --git a/scripts/benchmark.py b/scripts/benchmark.py index f6388d3..f633f7e 100644 --- a/scripts/benchmark.py +++ b/scripts/benchmark.py @@ -13,32 +13,55 @@ from torchscan import crawl_module TORCHVISION_MODELS = [ - 'alexnet', - 'googlenet', - 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn', 'vgg19', 'vgg19_bn', - 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152', - 'inception_v3', - 'squeezenet1_0', 'squeezenet1_1', - 'wide_resnet50_2', 'wide_resnet101_2', - 'densenet121', 'densenet161', 'densenet169', 'densenet201', - 'resnext50_32x4d', 'resnext101_32x8d', - 'mobilenet_v2', - 'shufflenet_v2_x0_5', 'shufflenet_v2_x1_0', 'shufflenet_v2_x1_5', 'shufflenet_v2_x2_0', - 'mnasnet0_5', 'mnasnet0_75', 'mnasnet1_0', 'mnasnet1_3' + "alexnet", + "googlenet", + "vgg11", + "vgg11_bn", + "vgg13", + "vgg13_bn", + "vgg16", + "vgg16_bn", + "vgg19", + "vgg19_bn", + "resnet18", + "resnet34", + "resnet50", + "resnet101", + "resnet152", + "inception_v3", + "squeezenet1_0", + "squeezenet1_1", + "wide_resnet50_2", + "wide_resnet101_2", + "densenet121", + "densenet161", + "densenet169", + "densenet201", + "resnext50_32x4d", + "resnext101_32x8d", + "mobilenet_v2", + "shufflenet_v2_x0_5", + "shufflenet_v2_x1_0", + "shufflenet_v2_x1_5", + "shufflenet_v2_x2_0", + "mnasnet0_5", + "mnasnet0_75", + "mnasnet1_0", + "mnasnet1_3", ] def main(): - device = 'cuda' if torch.cuda.is_available() else 'cpu' + device = "cuda" if torch.cuda.is_available() else "cpu" margin = 4 - headers = ['Model', 'Params (M)', 'FLOPs (G)', 'MACs (G)', 'DMAs (G)', 'RF'] + headers = ["Model", "Params (M)", "FLOPs (G)", "MACs (G)", "DMAs (G)", "RF"] max_w = [20, 10, 10, 10, 10, 10] - info_str = [(' ' * margin).join([f"{col_name:<{col_w}}" for col_name, col_w in zip(headers, max_w)])] - info_str.append('-' * len(info_str[0])) - print('\n'.join(info_str)) + info_str = [(" " * margin).join([f"{col_name:<{col_w}}" for col_name, col_w in zip(headers, max_w)])] + info_str.append("-" * len(info_str[0])) + print("\n".join(info_str)) for name in TORCHVISION_MODELS: model = models.__dict__[name]().eval().to(device) dsize = (3, 224, 224) @@ -46,13 +69,15 @@ def main(): dsize = (3, 299, 299) model_info = crawl_module(model, dsize) - tot_params = sum(layer['grad_params'] + layer['nograd_params'] for layer in model_info['layers']) - tot_flops = sum(layer['flops'] for layer in model_info['layers']) - tot_macs = sum(layer['macs'] for layer in model_info['layers']) - tot_dmas = sum(layer['dmas'] for layer in model_info['layers']) - rf = model_info['layers'][0]['rf'] - print(f"{name:<{max_w[0]}} | {tot_params / 1e6:<{max_w[1]}.2f} | {tot_flops / 1e9:<{max_w[2]}.2f} | " - f"{tot_macs / 1e9:<{max_w[3]}.2f} | {tot_dmas / 1e9:<{max_w[4]}.2f} | {rf:<{max_w[5]}.0f}") + tot_params = sum(layer["grad_params"] + layer["nograd_params"] for layer in model_info["layers"]) + tot_flops = sum(layer["flops"] for layer in model_info["layers"]) + tot_macs = sum(layer["macs"] for layer in model_info["layers"]) + tot_dmas = sum(layer["dmas"] for layer in model_info["layers"]) + rf = model_info["layers"][0]["rf"] + print( + f"{name:<{max_w[0]}} | {tot_params / 1e6:<{max_w[1]}.2f} | {tot_flops / 1e9:<{max_w[2]}.2f} | " + f"{tot_macs / 1e9:<{max_w[3]}.2f} | {tot_dmas / 1e9:<{max_w[4]}.2f} | {rf:<{max_w[5]}.0f}" + ) if __name__ == "__main__": diff --git a/scripts/collect_env.py b/scripts/collect_env.py index 16bab7c..5f994d8 100644 --- a/scripts/collect_env.py +++ b/scripts/collect_env.py @@ -20,12 +20,14 @@ try: import torchscan + TORCHSCAN_AVAILABLE = True except (ImportError, NameError, AttributeError, OSError): TORCHSCAN_AVAILABLE = False try: import torch + TORCH_AVAILABLE = True except (ImportError, NameError, AttributeError, OSError): TORCH_AVAILABLE = False @@ -34,23 +36,25 @@ # System Environment Information -SystemEnv = namedtuple('SystemEnv', [ - 'torchscan_version', - 'torch_version', - 'os', - 'python_version', - 'is_cuda_available', - 'cuda_runtime_version', - 'nvidia_driver_version', - 'nvidia_gpu_models', - 'cudnn_version', -]) +SystemEnv = namedtuple( + "SystemEnv", + [ + "torchscan_version", + "torch_version", + "os", + "python_version", + "is_cuda_available", + "cuda_runtime_version", + "nvidia_driver_version", + "nvidia_gpu_models", + "cudnn_version", + ], +) def run(command): """Returns (return-code, stdout, stderr)""" - p = subprocess.Popen(command, stdout=subprocess.PIPE, - stderr=subprocess.PIPE, shell=True) + p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) output, err = p.communicate() rc = p.returncode if PY3: @@ -80,53 +84,52 @@ def run_and_parse_first_match(run_lambda, command, regex): def get_nvidia_driver_version(run_lambda): - if get_platform() == 'darwin': - cmd = 'kextstat | grep -i cuda' - return run_and_parse_first_match(run_lambda, cmd, - r'com[.]nvidia[.]CUDA [(](.*?)[)]') + if get_platform() == "darwin": + cmd = "kextstat | grep -i cuda" + return run_and_parse_first_match(run_lambda, cmd, r"com[.]nvidia[.]CUDA [(](.*?)[)]") smi = get_nvidia_smi() - return run_and_parse_first_match(run_lambda, smi, r'Driver Version: (.*?) ') + return run_and_parse_first_match(run_lambda, smi, r"Driver Version: (.*?) ") def get_gpu_info(run_lambda): - if get_platform() == 'darwin': + if get_platform() == "darwin": if TORCH_AVAILABLE and torch.cuda.is_available(): return torch.cuda.get_device_name(None) return None smi = get_nvidia_smi() - uuid_regex = re.compile(r' \(UUID: .+?\)') - rc, out, _ = run_lambda(smi + ' -L') + uuid_regex = re.compile(r" \(UUID: .+?\)") + rc, out, _ = run_lambda(smi + " -L") if rc != 0: return None # Anonymize GPUs by removing their UUID - return re.sub(uuid_regex, '', out) + return re.sub(uuid_regex, "", out) def get_running_cuda_version(run_lambda): - return run_and_parse_first_match(run_lambda, 'nvcc --version', r'release .+ V(.*)') + return run_and_parse_first_match(run_lambda, "nvcc --version", r"release .+ V(.*)") def get_cudnn_version(run_lambda): """This will return a list of libcudnn.so; it's hard to tell which one is being used""" - if get_platform() == 'win32': + if get_platform() == "win32": cudnn_cmd = 'where /R "%CUDA_PATH%\\bin" cudnn*.dll' - elif get_platform() == 'darwin': + elif get_platform() == "darwin": # CUDA libraries and drivers can be found in /usr/local/cuda/. See # https://docs.nvidia.com/cuda/cuda-installation-guide-mac-os-x/index.html#install # https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installmac # Use CUDNN_LIBRARY when cudnn library is installed elsewhere. - cudnn_cmd = 'ls /usr/local/cuda/lib/libcudnn*' + cudnn_cmd = "ls /usr/local/cuda/lib/libcudnn*" else: cudnn_cmd = 'ldconfig -p | grep libcudnn | rev | cut -d" " -f1 | rev' rc, out, _ = run_lambda(cudnn_cmd) # find will return 1 if there are permission errors or if not found if len(out) == 0 or rc not in (1, 0): - lib = os.environ.get('CUDNN_LIBRARY') + lib = os.environ.get("CUDNN_LIBRARY") if lib is not None and os.path.isfile(lib): return os.path.realpath(lib) return None files = set() - for fn in out.split('\n'): + for fn in out.split("\n"): fn = os.path.realpath(fn) # eliminate symbolic links if os.path.isfile(fn): files.add(fn) @@ -136,61 +139,68 @@ def get_cudnn_version(run_lambda): files = list(sorted(files)) if len(files) == 1: return files[0] - result = '\n'.join(files) - return 'Probably one of the following:\n{}'.format(result) + result = "\n".join(files) + return "Probably one of the following:\n{}".format(result) def get_nvidia_smi(): # Note: nvidia-smi is currently available only on Windows and Linux - smi = 'nvidia-smi' - if get_platform() == 'win32': - smi = '"C:\\Program Files\\NVIDIA Corporation\\NVSMI\\%s"' % smi + smi = "nvidia-smi" + if get_platform() == "win32": + system_root = os.environ.get("SYSTEMROOT", "C:\\Windows") + program_files_root = os.environ.get("PROGRAMFILES", "C:\\Program Files") + legacy_path = os.path.join(program_files_root, "NVIDIA Corporation", "NVSMI", smi) + new_path = os.path.join(system_root, "System32", smi) + smis = [new_path, legacy_path] + for candidate_smi in smis: + if os.path.exists(candidate_smi): + smi = '"{}"'.format(candidate_smi) + break return smi def get_platform(): - if sys.platform.startswith('linux'): - return 'linux' - elif sys.platform.startswith('win32'): - return 'win32' - elif sys.platform.startswith('cygwin'): - return 'cygwin' - elif sys.platform.startswith('darwin'): - return 'darwin' + if sys.platform.startswith("linux"): + return "linux" + elif sys.platform.startswith("win32"): + return "win32" + elif sys.platform.startswith("cygwin"): + return "cygwin" + elif sys.platform.startswith("darwin"): + return "darwin" else: return sys.platform def get_mac_version(run_lambda): - return run_and_parse_first_match(run_lambda, 'sw_vers -productVersion', r'(.*)') + return run_and_parse_first_match(run_lambda, "sw_vers -productVersion", r"(.*)") def get_windows_version(run_lambda): - return run_and_read_all(run_lambda, 'wmic os get Caption | findstr /v Caption') + return run_and_read_all(run_lambda, "wmic os get Caption | findstr /v Caption") def get_lsb_version(run_lambda): - return run_and_parse_first_match(run_lambda, 'lsb_release -a', r'Description:\t(.*)') + return run_and_parse_first_match(run_lambda, "lsb_release -a", r"Description:\t(.*)") def check_release_file(run_lambda): - return run_and_parse_first_match(run_lambda, 'cat /etc/*-release', - r'PRETTY_NAME="(.*)"') + return run_and_parse_first_match(run_lambda, "cat /etc/*-release", r'PRETTY_NAME="(.*)"') def get_os(run_lambda): platform = get_platform() - if platform in ('win32', 'cygwin'): + if platform in ("win32", "cygwin"): return get_windows_version(run_lambda) - if platform == 'darwin': + if platform == "darwin": version = get_mac_version(run_lambda) if version is None: return None - return 'Mac OSX {}'.format(version) + return "Mac OSX {}".format(version) - if platform == 'linux': + if platform == "linux": # Ubuntu/Debian based desc = get_lsb_version(run_lambda) if desc is not None: @@ -213,13 +223,13 @@ def get_env_info(): if TORCHSCAN_AVAILABLE: torchscan_str = torchscan.__version__ else: - torchscan_str = 'N/A' + torchscan_str = "N/A" if TORCH_AVAILABLE: torch_str = torch.__version__ cuda_available_str = torch.cuda.is_available() else: - torch_str = cuda_available_str = 'N/A' + torch_str = cuda_available_str = "N/A" return SystemEnv( torchscan_version=torchscan_str, @@ -250,14 +260,14 @@ def get_env_info(): def pretty_str(envinfo): - def replace_nones(dct, replacement='Could not collect'): + def replace_nones(dct, replacement="Could not collect"): for key in dct.keys(): if dct[key] is not None: continue dct[key] = replacement return dct - def replace_bools(dct, true='Yes', false='No'): + def replace_bools(dct, true="Yes", false="No"): for key in dct.keys(): if dct[key] is True: dct[key] = true @@ -267,28 +277,26 @@ def replace_bools(dct, true='Yes', false='No'): def maybe_start_on_next_line(string): # If `string` is multiline, prepend a \n to it. - if string is not None and len(string.split('\n')) > 1: - return '\n{}\n'.format(string) + if string is not None and len(string.split("\n")) > 1: + return "\n{}\n".format(string) return string mutable_dict = envinfo._asdict() # If nvidia_gpu_models is multiline, start on the next line - mutable_dict['nvidia_gpu_models'] = \ - maybe_start_on_next_line(envinfo.nvidia_gpu_models) + mutable_dict["nvidia_gpu_models"] = maybe_start_on_next_line(envinfo.nvidia_gpu_models) # If the machine doesn't have CUDA, report some fields as 'No CUDA' dynamic_cuda_fields = [ - 'cuda_runtime_version', - 'nvidia_gpu_models', - 'nvidia_driver_version', + "cuda_runtime_version", + "nvidia_gpu_models", + "nvidia_driver_version", ] - all_cuda_fields = dynamic_cuda_fields + ['cudnn_version'] - all_dynamic_cuda_fields_missing = all( - mutable_dict[field] is None for field in dynamic_cuda_fields) + all_cuda_fields = dynamic_cuda_fields + ["cudnn_version"] + all_dynamic_cuda_fields_missing = all(mutable_dict[field] is None for field in dynamic_cuda_fields) if TORCH_AVAILABLE and not torch.cuda.is_available() and all_dynamic_cuda_fields_missing: for field in all_cuda_fields: - mutable_dict[field] = 'No CUDA' + mutable_dict[field] = "No CUDA" # Replace True with Yes, False with No mutable_dict = replace_bools(mutable_dict) @@ -314,5 +322,5 @@ def main(): print(output) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index a4c8e04..0000000 --- a/setup.cfg +++ /dev/null @@ -1,90 +0,0 @@ -[metadata] -author = François-Guillaume Fernandez -author_email = fg-feedback@protonmail.com -description = Useful information about your Pytorch module -long_description = file: README.md -long_description_content_type = text/markdown; charset=UTF-8 -url = https://github.com/frgfm/torch-scan -download_url = https://github.com/frgfm/torch-scan/tags -project_urls = - Documentation = https://frgfm.github.io/torch-scan - Source = https://github.com/frgfm/torch-scan - Tracker = https://github.com/frgfm/torch-scan/issues -license = Apache -license_file = LICENSE -keywords = pytorch, deep learning, summary, memory, ram -classifiers = - Development Status :: 4 - Beta - Intended Audience :: Developers - Intended Audience :: Science/Research - License :: OSI Approved :: Apache Software License - Natural Language :: English - Operating System :: OS Independent - Programming Language :: Python :: 3 - Programming Language :: Python :: 3.6 - Programming Language :: Python :: 3.7 - Programming Language :: Python :: 3.8 - Topic :: Scientific/Engineering - Topic :: Scientific/Engineering :: Mathematics - Topic :: Scientific/Engineering :: Artificial Intelligence - -[options] -zip_safe = True -packages = find: -include_package_data = True -python_requires = >=3.6.0,<4 -install_requires = - torch>=1.5.0 - -[options.package_data] -* = LICENSE - -[options.packages.find] -exclude = - tests* - -[options.extras_require] -test = - pytest>=5.3.2 - coverage>=4.5.4 -quality = - flake8>=3.9.0 - isort>=5.7.0 - mypy>=0.812 - pydocstyle>=6.0.0 -docs = - sphinx<=3.4.3 - sphinx-rtd-theme==0.4.3 - sphinxemoji>=0.1.8 - sphinx-copybutton>=0.3.1 - docutils<0.18 - # cf. https://github.com/readthedocs/readthedocs.org/issues/9038 - Jinja2<3.1 -dev = - %(test)s - %(quality)s - %(docs)s - -[flake8] -max-line-length = 120 -ignore = E402, E265, F403, W503, W504, E731 -exclude = .github, .git, venv*, docs, build -per-file-ignores = **/__init__.py:F401 - -[mypy] -files = torchscan/*.py -show_error_codes = True -pretty = True - -[isort] -line_length = 120 -src_paths = torchscan, tests -skip_glob = **/__init__.py -known_third_party = torch, torchvision - -[pydocstyle] -select = D300,D301,D417 -match = .*\.py - -[coverage:run] -source = torchscan diff --git a/setup.py b/setup.py index dffccbe..ac8f2fa 100644 --- a/setup.py +++ b/setup.py @@ -19,7 +19,7 @@ # Dynamically set the __version__ attribute cwd = Path(__file__).parent.absolute() - with open(cwd.joinpath('torchscan', 'version.py'), 'w', encoding='utf-8') as f: + with open(cwd.joinpath("torchscan", "version.py"), "w", encoding="utf-8") as f: f.write(f"__version__ = '{VERSION}'\n") setup(name=PKG_NAME, version=VERSION) diff --git a/tests/requirements.txt b/tests/requirements.txt index f067cbf..4404805 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -1,2 +1,2 @@ pytest>=5.3.2 -coverage>=4.5.4 +coverage[toml]>=4.5.4 diff --git a/tests/test_crawler.py b/tests/test_crawler.py index 8194f5e..309f160 100644 --- a/tests/test_crawler.py +++ b/tests/test_crawler.py @@ -12,13 +12,13 @@ def test_apply(): # Tag module attributes def tag_name(mod, name): - mod.__depth__ = len(name.split('.')) - 1 - mod.__name__ = name.rpartition('.')[-1] + mod.__depth__ = len(name.split(".")) - 1 + mod.__name__ = name.rpartition(".")[-1] crawler.apply(mod, tag_name) assert mod[1][1].__depth__ == 2 - assert mod[1][1].__name__ == '1' + assert mod[1][1].__name__ == "1" def test_crawl_module(): @@ -27,8 +27,8 @@ def test_crawl_module(): res = crawler.crawl_module(mod, (3, 32, 32)) assert isinstance(res, dict) - assert res['overall']['grad_params'] == 224 - assert res['layers'][0]['output_shape'] == (-1, 8, 30, 30) + assert res["overall"]["grad_params"] == 224 + assert res["layers"][0]["output_shape"] == (-1, 8, 30, 30) def test_summary(): @@ -41,7 +41,7 @@ def test_summary(): crawler.summary(mod, (3, 32, 32)) # Reset redirect. sys.stdout = sys.__stdout__ - assert captured_output.getvalue().split('\n')[7] == 'Total params: 224' + assert captured_output.getvalue().split("\n")[7] == "Total params: 224" # Check receptive field captured_output = io.StringIO() @@ -49,13 +49,13 @@ def test_summary(): crawler.summary(mod, (3, 32, 32), receptive_field=True) # Reset redirect. sys.stdout = sys.__stdout__ - assert captured_output.getvalue().split('\n')[1].rpartition(' ')[-1] == 'Receptive field' - assert captured_output.getvalue().split('\n')[3].split()[-1] == '3' + assert captured_output.getvalue().split("\n")[1].rpartition(" ")[-1] == "Receptive field" + assert captured_output.getvalue().split("\n")[3].split()[-1] == "3" # Check effective stats captured_output = io.StringIO() sys.stdout = captured_output crawler.summary(mod, (3, 32, 32), receptive_field=True, effective_rf_stats=True) # Reset redirect. sys.stdout = sys.__stdout__ - assert captured_output.getvalue().split('\n')[1].rpartition(' ')[-1] == 'Effective padding' - assert captured_output.getvalue().split('\n')[3].split()[-1] == '0' + assert captured_output.getvalue().split("\n")[1].rpartition(" ")[-1] == "Effective padding" + assert captured_output.getvalue().split("\n")[3].split()[-1] == "0" diff --git a/tests/test_utils.py b/tests/test_utils.py index e29a63a..03d263a 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -4,7 +4,7 @@ def test_format_name(): - name = 'mymodule' + name = "mymodule" assert utils.format_name(name) == name assert utils.format_name(name, depth=1) == f"├─{name}" assert utils.format_name(name, depth=3) == f"| | └─{name}" @@ -12,15 +12,15 @@ def test_format_name(): def test_wrap_string(): - example = '.'.join(['a' for _ in range(10)]) + example = ".".join(["a" for _ in range(10)]) max_len = 10 - wrap = '[...]' + wrap = "[...]" - assert utils.wrap_string(example, max_len, mode='end') == example[:max_len - len(wrap)] + wrap - assert utils.wrap_string(example, max_len, mode='mid') == f"{example[:max_len - 2 - len(wrap)]}{wrap}.a" - assert utils.wrap_string(example, len(example), mode='end') == example + assert utils.wrap_string(example, max_len, mode="end") == example[: max_len - len(wrap)] + wrap + assert utils.wrap_string(example, max_len, mode="mid") == f"{example[:max_len - 2 - len(wrap)]}{wrap}.a" + assert utils.wrap_string(example, len(example), mode="end") == example with pytest.raises(ValueError): - _ = utils.wrap_string(example, max_len, mode='test') + _ = utils.wrap_string(example, max_len, mode="test") @pytest.mark.parametrize( diff --git a/torchscan/crawler.py b/torchscan/crawler.py index 876bfe2..8e853f2 100644 --- a/torchscan/crawler.py +++ b/torchscan/crawler.py @@ -13,7 +13,7 @@ from .process import get_process_gpu_ram from .utils import aggregate_info, format_info -__all__ = ['crawl_module', 'summary'] +__all__ = ["crawl_module", "summary"] def apply(module: Module, fn: Callable[[Module, str], None], name: Optional[str] = None) -> None: @@ -35,7 +35,7 @@ def apply(module: Module, fn: Callable[[Module, str], None], name: Optional[str] def crawl_module( module: Module, input_shape: Union[List[Tuple[int, ...]], Tuple[int, ...]], - dtype: Optional[Union[torch.dtype, Iterable[torch.dtype]]] = None + dtype: Optional[Union[torch.dtype, Iterable[torch.dtype]]] = None, ) -> Dict[str, Any]: """Retrieves module information for an expected input tensor shape @@ -57,12 +57,12 @@ def crawl_module( p = next(module.parameters()) device = p.device - cuda_overhead, framework_overhead = 0., 0. + cuda_overhead, framework_overhead = 0.0, 0.0 if torch.cuda.is_available(): - # Process RAM - allocator RAM - cuda_overhead = get_process_gpu_ram(os.getpid()) - (torch.cuda.memory_reserved() / 1024 ** 2) + # Process RAM - allocator RAM + cuda_overhead = get_process_gpu_ram(os.getpid()) - (torch.cuda.memory_reserved() / 1024**2) # Allocator RAM - Used RAM - framework_overhead = (torch.cuda.memory_reserved() - torch.cuda.memory_allocated()) / 1024 ** 2 + framework_overhead = (torch.cuda.memory_reserved() - torch.cuda.memory_allocated()) / 1024**2 # input if not isinstance(input_shape, list): @@ -72,8 +72,9 @@ def crawl_module( if isinstance(dtype, torch.dtype): dtype = [dtype] * len(input_shape) # Tensor arguments - input_ts = [torch.rand(1, *in_shape).to(dtype=_dtype, device=device) - for in_shape, _dtype in zip(input_shape, dtype)] + input_ts = [ + torch.rand(1, *in_shape).to(dtype=_dtype, device=device) for in_shape, _dtype in zip(input_shape, dtype) + ] pre_fw_handles, post_fw_handles = [], [] pre_hook_tracker: Dict[int, Any] = {} @@ -81,19 +82,19 @@ def crawl_module( # Hook definition def _hook_info(module: Module, name: str) -> None: - def _pre_hook(module: Module, input: torch.Tensor) -> None: """Pre-forward hook""" - # Check that another hook has not been triggered at this forward stage - if not pre_hook_tracker[id(module)]['is_used'] and \ - (pre_hook_tracker[id(module)]['target'] == pre_hook_tracker[id(module)]['current']): + # Check that another hook has not been triggered at this forward stage + if not pre_hook_tracker[id(module)]["is_used"] and ( + pre_hook_tracker[id(module)]["target"] == pre_hook_tracker[id(module)]["current"] + ): # Add information # Params grad_params, nograd_params, param_size = 0, 0, 0 num_buffers, buffer_size = 0, 0 is_shared = False if not any(module.children()): - # Parameters + # Parameters for p in module.parameters(): if id(p) not in param_ids: if p.requires_grad: @@ -118,40 +119,45 @@ def _pre_hook(module: Module, input: torch.Tensor) -> None: else: call_idxs[id(module)].append(len(info)) - info.append(dict(name=name.rpartition('.')[-1], - depth=len(name.split('.')) - 1, - type=module.__class__.__name__, - input_shape=(-1, *input[0][0].shape[1:]), - output_shape=None, - grad_params=grad_params, - nograd_params=nograd_params, - param_size=param_size, - num_buffers=num_buffers, - buffer_size=buffer_size, - flops=0, - macs=0, - dmas=0, - rf=1, - s=1, - p=0, - is_shared=is_shared, - is_leaf=not any(module.children()))) + info.append( + dict( + name=name.rpartition(".")[-1], + depth=len(name.split(".")) - 1, + type=module.__class__.__name__, + input_shape=(-1, *input[0][0].shape[1:]), + output_shape=None, + grad_params=grad_params, + nograd_params=nograd_params, + param_size=param_size, + num_buffers=num_buffers, + buffer_size=buffer_size, + flops=0, + macs=0, + dmas=0, + rf=1, + s=1, + p=0, + is_shared=is_shared, + is_leaf=not any(module.children()), + ) + ) # Mark the next hook for execution - pre_hook_tracker[id(module)]['target'] += 1 + pre_hook_tracker[id(module)]["target"] += 1 # Current pass already used one of the hooks - pre_hook_tracker[id(module)]['is_used'] = True - pre_hook_tracker[id(module)]['current'] += 1 + pre_hook_tracker[id(module)]["is_used"] = True + pre_hook_tracker[id(module)]["current"] += 1 # All the hooks have been checked, reset the temporary values - if pre_hook_tracker[id(module)]['current'] == len(module._forward_pre_hooks): - pre_hook_tracker[id(module)]['current'] = 0 - pre_hook_tracker[id(module)]['is_used'] = False + if pre_hook_tracker[id(module)]["current"] == len(module._forward_pre_hooks): + pre_hook_tracker[id(module)]["current"] = 0 + pre_hook_tracker[id(module)]["is_used"] = False def _fwd_hook(module: Module, inputs: Tuple[torch.Tensor, ...], output: torch.Tensor) -> None: """Post-forward hook""" - # Check that another hook has not been triggered at this forward stage - if not post_hook_tracker[id(module)]['is_used'] and \ - (post_hook_tracker[id(module)]['target'] == post_hook_tracker[id(module)]['current']): + # Check that another hook has not been triggered at this forward stage + if not post_hook_tracker[id(module)]["is_used"] and ( + post_hook_tracker[id(module)]["target"] == post_hook_tracker[id(module)]["current"] + ): # Write information # Retrieve forward index if len(call_idxs[id(module)]) == 1: @@ -159,13 +165,13 @@ def _fwd_hook(module: Module, inputs: Tuple[torch.Tensor, ...], output: torch.Te else: # The first dictionary with output_shape=None is the correct one for _idx in call_idxs[id(module)]: - if info[_idx]['output_shape'] is None: + if info[_idx]["output_shape"] is None: fw_idx = _idx break if any(module.children()): tot_flops, tot_macs, tot_dmas = 0, 0, 0 - current_rf, current_stride, current_padding = 1., 1., 0. + current_rf, current_stride, current_padding = 1.0, 1.0, 0.0 else: # Compute stats for standalone layers tot_flops = module_flops(module, inputs, output) @@ -174,25 +180,25 @@ def _fwd_hook(module: Module, inputs: Tuple[torch.Tensor, ...], output: torch.Te current_rf, current_stride, current_padding = module_rf(module, inputs[0], output) # Update layer information - info[fw_idx]['output_shape'] = (-1, *output.shape[1:]) - # Add them, since some modules can be used several times - info[fw_idx]['flops'] = tot_flops - info[fw_idx]['macs'] = tot_macs - info[fw_idx]['dmas'] = tot_dmas + info[fw_idx]["output_shape"] = (-1, *output.shape[1:]) + # Add them, since some modules can be used several times + info[fw_idx]["flops"] = tot_flops + info[fw_idx]["macs"] = tot_macs + info[fw_idx]["dmas"] = tot_dmas # Compute receptive field - info[fw_idx]['rf'] = current_rf - info[fw_idx]['s'] = current_stride - info[fw_idx]['p'] = current_padding + info[fw_idx]["rf"] = current_rf + info[fw_idx]["s"] = current_stride + info[fw_idx]["p"] = current_padding # Mark the next hook for execution - post_hook_tracker[id(module)]['target'] += 1 + post_hook_tracker[id(module)]["target"] += 1 # Current pass already used one of the hooks - post_hook_tracker[id(module)]['is_used'] = True - post_hook_tracker[id(module)]['current'] += 1 + post_hook_tracker[id(module)]["is_used"] = True + post_hook_tracker[id(module)]["current"] += 1 # All the hooks have been checked, reset the temporary values - if post_hook_tracker[id(module)]['current'] == len(module._forward_pre_hooks): - post_hook_tracker[id(module)]['current'] = 0 - post_hook_tracker[id(module)]['is_used'] = False + if post_hook_tracker[id(module)]["current"] == len(module._forward_pre_hooks): + post_hook_tracker[id(module)]["current"] = 0 + post_hook_tracker[id(module)]["is_used"] = False pre_fw_handles.append(module.register_forward_pre_hook(_pre_hook)) post_fw_handles.append(module.register_forward_hook(_fwd_hook)) @@ -216,10 +222,10 @@ def _fwd_hook(module: Module, inputs: Tuple[torch.Tensor, ...], output: torch.Te for handle in post_fw_handles: handle.remove() - reserved_ram, diff_ram = 0., 0. + reserved_ram, diff_ram = 0.0, 0.0 if torch.cuda.is_available(): - reserved_ram = torch.cuda.memory_reserved() / 1024 ** 2 - diff_ram = (torch.cuda.memory_reserved() - torch.cuda.memory_allocated()) / 1024 ** 2 + reserved_ram = torch.cuda.memory_reserved() / 1024**2 + diff_ram = (torch.cuda.memory_reserved() - torch.cuda.memory_allocated()) / 1024**2 torch.cuda.synchronize() torch.cuda.empty_cache() @@ -235,27 +241,36 @@ def _fwd_hook(module: Module, inputs: Tuple[torch.Tensor, ...], output: torch.Te num_buffers += b.numel() buffer_size += b.numel() * b.element_size() - # Update cumulative receptive field + # Update cumulative receptive field _rf, _s, _p = 1, 1, 0 for fw_idx, _layer in enumerate(info): - _rf += _s * (_layer['rf'] - 1) - _p += _s * _layer['p'] - _s *= _layer['s'] - info[fw_idx]['rf'] = _rf - info[fw_idx]['s'] = _s - info[fw_idx]['p'] = _p - - return dict(overheads=dict(cuda=dict(pre=cuda_overhead, fwd=get_process_gpu_ram(os.getpid()) - reserved_ram), - framework=dict(pre=framework_overhead, fwd=diff_ram)), - layers=info, - overall=dict(grad_params=grad_params, nograd_params=nograd_params, param_size=param_size, - num_buffers=num_buffers, buffer_size=buffer_size)) + _rf += _s * (_layer["rf"] - 1) + _p += _s * _layer["p"] + _s *= _layer["s"] + info[fw_idx]["rf"] = _rf + info[fw_idx]["s"] = _s + info[fw_idx]["p"] = _p + + return dict( + overheads=dict( + cuda=dict(pre=cuda_overhead, fwd=get_process_gpu_ram(os.getpid()) - reserved_ram), + framework=dict(pre=framework_overhead, fwd=diff_ram), + ), + layers=info, + overall=dict( + grad_params=grad_params, + nograd_params=nograd_params, + param_size=param_size, + num_buffers=num_buffers, + buffer_size=buffer_size, + ), + ) def summary( module: Module, input_shape: Tuple[int, ...], - wrap_mode: str = 'mid', + wrap_mode: str = "mid", max_depth: Optional[int] = None, receptive_field: bool = False, effective_rf_stats: bool = False, diff --git a/torchscan/modules/flops.py b/torchscan/modules/flops.py index b6782a0..18d2a35 100644 --- a/torchscan/modules/flops.py +++ b/torchscan/modules/flops.py @@ -15,7 +15,7 @@ from torch.nn.modules.conv import _ConvNd, _ConvTransposeNd from torch.nn.modules.pooling import _AdaptiveAvgPoolNd, _AdaptiveMaxPoolNd, _AvgPoolNd, _MaxPoolNd -__all__ = ['module_flops'] +__all__ = ["module_flops"] def module_flops(module: Module, inputs: Tuple[Tensor, ...], output: Tensor) -> int: @@ -64,7 +64,7 @@ def module_flops(module: Module, inputs: Tuple[Tensor, ...], output: Tensor) -> elif isinstance(module, nn.Transformer): return flops_transformer(module, inputs) else: - warnings.warn(f'Module type not supported: {module.__class__.__name__}') + warnings.warn(f"Module type not supported: {module.__class__.__name__}") return 0 @@ -150,7 +150,7 @@ def flops_convnd(module: _ConvNd, inputs: Tuple[Tensor, ...], output: Tensor) -> # For each position, # mult = kernel size, # adds = kernel size - 1 window_flops_per_chan = 2 * reduce(mul, module.kernel_size) - 1 # Connections to input channels is controlled by the group parameter - effective_in_chan = (inputs[0].shape[1] // module.groups) + effective_in_chan = inputs[0].shape[1] // module.groups # N * flops + (N - 1) additions window_flops = effective_in_chan * window_flops_per_chan + (effective_in_chan - 1) conv_flops = output.numel() * window_flops @@ -166,13 +166,13 @@ def flops_bn(module: _BatchNorm, inputs: Tuple[Tensor, ...]) -> int: # for each channel, add eps and running_var, sqrt it norm_ops = module.num_features * 2 - # For each element, sub running_mean, div by denom + # For each element, sub running_mean, div by denom norm_ops += inputs[0].numel() * 2 # For each element, mul by gamma, add beta scale_ops = inputs[0].numel() * 2 if module.affine else 0 bn_flops = norm_ops + scale_ops - # Count tracking stats update ops + # Count tracking stats update ops # cf. https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/batchnorm.py#L94-L101 tracking_flops = 0 if module.track_running_stats and module.training: @@ -180,9 +180,9 @@ def flops_bn(module: _BatchNorm, inputs: Tuple[Tensor, ...]) -> int: if module.momentum is None: tracking_flops += 1 # running_mean: by channel, sum values and div by batch size - tracking_flops += inputs[0].numel() # type: ignore[attr-defined] + tracking_flops += inputs[0].numel() # running_var: by channel, sub mean and square values, sum them, divide by batch size - tracking_flops += 3 * inputs[0].numel() # type: ignore[attr-defined] + tracking_flops += 3 * inputs[0].numel() # Update both runnning stat: rescale previous value (mul by N), add it the new one, then div by (N + 1) tracking_flops += 2 * module.num_features * 3 @@ -204,19 +204,17 @@ def flops_avgpool(module: _AvgPoolNd, inputs: Tuple[Tensor, ...], output: Tensor k_size = reduce(mul, module.kernel_size) if isinstance(module.kernel_size, tuple) else module.kernel_size # for each spatial output element, sum elements in kernel scope and div by kernel size - return output.numel() * (k_size - 1 + inputs[0].ndim - 2) # type: ignore[attr-defined] + return output.numel() * (k_size - 1 + inputs[0].ndim - 2) def flops_adaptive_maxpool(module: _AdaptiveMaxPoolNd, inputs: Tuple[Tensor, ...], output: Tensor) -> int: """FLOPs estimation for `torch.nn.modules.pooling._AdaptiveMaxPoolNd`""" - if isinstance(module.output_size, tuple): - o_sizes = module.output_size - else: - o_sizes = (module.output_size,) * (inputs[0].ndim - 2) # type: ignore[attr-defined] - # Approximate kernel_size using ratio of spatial shapes between input and output - kernel_size = tuple(i_size // o_size if (i_size % o_size) == 0 else i_size - o_size * (i_size // o_size) + 1 - for i_size, o_size in zip(inputs[0].shape[2:], o_sizes)) + # Approximate kernel_size using ratio of spatial shapes between input and output + kernel_size = tuple( + i_size // o_size if (i_size % o_size) == 0 else i_size - o_size * (i_size // o_size) + 1 + for i_size, o_size in zip(inputs[0].shape[2:], output.shape[2:]) + ) # for each spatial output element, check max element in kernel scope return output.numel() * (reduce(mul, kernel_size) - 1) @@ -225,13 +223,11 @@ def flops_adaptive_maxpool(module: _AdaptiveMaxPoolNd, inputs: Tuple[Tensor, ... def flops_adaptive_avgpool(module: _AdaptiveAvgPoolNd, inputs: Tuple[Tensor, ...], output: Tensor) -> int: """FLOPs estimation for `torch.nn.modules.pooling._AdaptiveAvgPoolNd`""" - if isinstance(module.output_size, tuple): - o_sizes = module.output_size - else: - o_sizes = (module.output_size,) * (inputs[0].ndim - 2) # type: ignore[attr-defined] - # Approximate kernel_size using ratio of spatial shapes between input and output - kernel_size = tuple(i_size // o_size if (i_size % o_size) == 0 else i_size - o_size * (i_size // o_size) + 1 - for i_size, o_size in zip(inputs[0].shape[2:], o_sizes)) + # Approximate kernel_size using ratio of spatial shapes between input and output + kernel_size = tuple( + i_size // o_size if (i_size % o_size) == 0 else i_size - o_size * (i_size // o_size) + 1 + for i_size, o_size in zip(inputs[0].shape[2:], output.shape[2:]) + ) # for each spatial output element, sum elements in kernel scope and div by kernel size return output.numel() * (reduce(mul, kernel_size) - 1 + len(kernel_size)) @@ -241,7 +237,7 @@ def flops_layernorm(module: nn.LayerNorm, inputs: Tuple[Tensor, ...]) -> int: """FLOPs estimation for `torch.nn.modules.batchnorm._BatchNorm`""" # Compute current mean - norm_ops = reduce(mul, module.normalized_shape) * inputs[0].shape[:-len(module.normalized_shape)].numel() + norm_ops = reduce(mul, module.normalized_shape) * inputs[0].shape[: -len(module.normalized_shape)].numel() # current var (sub the mean, square it, sum them, divide by remaining shape) norm_ops += 3 * inputs[0].numel() # for each channel, add eps and running_var, sqrt it @@ -263,36 +259,24 @@ def flops_mha(module: nn.MultiheadAttention, inputs: Tuple[Tensor, ...]) -> int: if module._qkv_same_embed_dim: tot_flops = 3 * flops_linear( nn.Linear( - module.in_proj_weight.shape[1], - module.in_proj_weight.shape[0], - bias=module.in_proj_bias is not None + module.in_proj_weight.shape[1], module.in_proj_weight.shape[0], bias=module.in_proj_bias is not None ), - (torch.empty((batch_size, module.in_proj_weight.shape[1])),) + (torch.empty((batch_size, module.in_proj_weight.shape[1])),), ) else: tot_flops = flops_linear( nn.Linear( - module.q_proj_weight.shape[1], - module.q_proj_weight.shape[0], - bias=module.in_proj_bias is not None + module.q_proj_weight.shape[1], module.q_proj_weight.shape[0], bias=module.in_proj_bias is not None ), - (torch.empty((batch_size, module.q_proj_weight.shape[1])),) + (torch.empty((batch_size, module.q_proj_weight.shape[1])),), ) tot_flops += flops_linear( - nn.Linear( - module.k_proj_weight.shape[1], - module.k_proj_weight.shape[0], - bias=module.bias_k is not None - ), - (torch.empty((batch_size, module.k_proj_weight.shape[1])),) + nn.Linear(module.k_proj_weight.shape[1], module.k_proj_weight.shape[0], bias=module.bias_k is not None), + (torch.empty((batch_size, module.k_proj_weight.shape[1])),), ) tot_flops += flops_linear( - nn.Linear( - module.v_proj_weight.shape[1], - module.v_proj_weight.shape[0], - bias=module.bias_v is not None - ), - (torch.empty((batch_size, module.v_proj_weight.shape[1])),) + nn.Linear(module.v_proj_weight.shape[1], module.v_proj_weight.shape[0], bias=module.bias_v is not None), + (torch.empty((batch_size, module.v_proj_weight.shape[1])),), ) # Q (L, B, embed_dim) --> (B * num_heads, L, head_dim=embed_dim / num_heads) diff --git a/torchscan/modules/macs.py b/torchscan/modules/macs.py index f1b66f7..78b448b 100644 --- a/torchscan/modules/macs.py +++ b/torchscan/modules/macs.py @@ -13,7 +13,7 @@ from torch.nn.modules.conv import _ConvNd, _ConvTransposeNd from torch.nn.modules.pooling import _AdaptiveAvgPoolNd, _AdaptiveMaxPoolNd, _AvgPoolNd, _MaxPoolNd -__all__ = ['module_macs'] +__all__ = ["module_macs"] def module_macs(module: Module, input: Tensor, output: Tensor) -> int: @@ -47,7 +47,7 @@ def module_macs(module: Module, input: Tensor, output: Tensor) -> int: elif isinstance(module, nn.Dropout): return 0 else: - warnings.warn(f'Module type not supported: {module.__class__.__name__}') + warnings.warn(f"Module type not supported: {module.__class__.__name__}") return 0 @@ -79,7 +79,7 @@ def macs_convnd(module: _ConvNd, input: Tensor, output: Tensor) -> int: # For each position, # mult = kernel size, # adds = kernel size - 1 window_macs_per_chan = reduce(mul, module.kernel_size) # Connections to input channels is controlled by the group parameter - effective_in_chan = (input.shape[1] // module.groups) + effective_in_chan = input.shape[1] // module.groups # N * mac window_mac = effective_in_chan * window_macs_per_chan conv_mac = output.numel() * window_mac @@ -99,14 +99,14 @@ def macs_bn(module: _BatchNorm, input: Tensor, output: Tensor) -> int: # Sum everything up bn_mac = input.numel() * (norm_mac + scale_mac) - # Count tracking stats update ops + # Count tracking stats update ops # cf. https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/batchnorm.py#L94-L101 tracking_mac = 0 b = input.shape[0] - num_spatial_elts = input.shape[2:].numel() # type: ignore[attr-defined] + num_spatial_elts = input.shape[2:].numel() if module.track_running_stats and module.training: # running_mean: by channel, sum value and div by batch size - tracking_mac += module.num_features * (b * num_spatial_elts - 1) # type: ignore[operator, attr-defined] + tracking_mac += module.num_features * (b * num_spatial_elts - 1) # running_var: by channel, sub mean and square values, sum them, divide by batch size active_elts = b * num_spatial_elts tracking_mac += module.num_features * (2 * active_elts - 1) @@ -131,19 +131,17 @@ def macs_avgpool(module: _AvgPoolNd, input: Tensor, output: Tensor) -> int: k_size = reduce(mul, module.kernel_size) if isinstance(module.kernel_size, tuple) else module.kernel_size # for each spatial output element, sum elements in kernel scope and div by kernel size - return output.numel() * (k_size - 1 + input.ndim - 2) # type: ignore[attr-defined] + return output.numel() * (k_size - 1 + input.ndim - 2) def macs_adaptive_maxpool(module: _AdaptiveMaxPoolNd, input: Tensor, output: Tensor) -> int: """MACs estimation for `torch.nn.modules.pooling._AdaptiveMaxPoolNd`""" - if isinstance(module.output_size, tuple): - o_sizes = module.output_size - else: - o_sizes = (module.output_size,) * (input.ndim - 2) # type: ignore[attr-defined] - # Approximate kernel_size using ratio of spatial shapes between input and output - kernel_size = tuple(i_size // o_size if (i_size % o_size) == 0 else i_size - o_size * (i_size // o_size) + 1 - for i_size, o_size in zip(input.shape[2:], o_sizes)) + # Approximate kernel_size using ratio of spatial shapes between input and output + kernel_size = tuple( + i_size // o_size if (i_size % o_size) == 0 else i_size - o_size * (i_size // o_size) + 1 + for i_size, o_size in zip(input.shape[2:], output.shape[2:]) + ) # for each spatial output element, check max element in kernel scope return output.numel() * (reduce(mul, kernel_size) - 1) @@ -152,13 +150,11 @@ def macs_adaptive_maxpool(module: _AdaptiveMaxPoolNd, input: Tensor, output: Ten def macs_adaptive_avgpool(module: _AdaptiveAvgPoolNd, input: Tensor, output: Tensor) -> int: """MACs estimation for `torch.nn.modules.pooling._AdaptiveAvgPoolNd`""" - if isinstance(module.output_size, tuple): - o_sizes = module.output_size - else: - o_sizes = (module.output_size,) * (input.ndim - 2) # type: ignore[attr-defined] - # Approximate kernel_size using ratio of spatial shapes between input and output - kernel_size = tuple(i_size // o_size if (i_size % o_size) == 0 else i_size - o_size * (i_size // o_size) + 1 - for i_size, o_size in zip(input.shape[2:], o_sizes)) + # Approximate kernel_size using ratio of spatial shapes between input and output + kernel_size = tuple( + i_size // o_size if (i_size % o_size) == 0 else i_size - o_size * (i_size // o_size) + 1 + for i_size, o_size in zip(input.shape[2:], output.shape[2:]) + ) # for each spatial output element, sum elements in kernel scope and div by kernel size return output.numel() * (reduce(mul, kernel_size) - 1 + len(kernel_size)) diff --git a/torchscan/modules/memory.py b/torchscan/modules/memory.py index 81073e9..6e96478 100644 --- a/torchscan/modules/memory.py +++ b/torchscan/modules/memory.py @@ -14,7 +14,7 @@ from torch.nn.modules.conv import _ConvNd, _ConvTransposeNd from torch.nn.modules.pooling import _AdaptiveAvgPoolNd, _AdaptiveMaxPoolNd, _AvgPoolNd, _MaxPoolNd -__all__ = ['module_dmas'] +__all__ = ["module_dmas"] def module_dmas(module: Module, input: Tensor, output: Tensor) -> int: @@ -56,7 +56,7 @@ def module_dmas(module: Module, input: Tensor, output: Tensor) -> int: elif isinstance(module, nn.Dropout): return dmas_dropout(module, input, output) else: - warnings.warn(f'Module type not supported: {module.__class__.__name__}') + warnings.warn(f"Module type not supported: {module.__class__.__name__}") return 0 @@ -165,7 +165,7 @@ def dmas_convtransposend(module: _ConvTransposeNd, input: Tensor, output: Tensor def dmas_convnd(module: _ConvNd, input: Tensor, output: Tensor) -> int: """DMAs estimation for `torch.nn.modules.conv._ConvNd`""" - # Each output element required K ** 2 memory access of each input channel + # Each output element required K ** 2 memory access of each input channel input_dma = module.in_channels * reduce(mul, module.kernel_size) * output.numel() # Correct with groups input_dma //= module.groups @@ -203,13 +203,13 @@ def dmas_bn(module: _BatchNorm, input: Tensor, output: Tensor) -> int: def dmas_pool(module: Union[_MaxPoolNd, _AvgPoolNd], input: Tensor, output: Tensor) -> int: """DMAs estimation for spatial pooling modules""" - # Resolve kernel size and stride size (can be stored as a single integer or a tuple) + # Resolve kernel size and stride size (can be stored as a single integer or a tuple) if isinstance(module.kernel_size, tuple): kernel_size = module.kernel_size elif isinstance(module.kernel_size, int): - kernel_size = (module.kernel_size,) * (input.ndim - 2) # type: ignore[attr-defined] + kernel_size = (module.kernel_size,) * (input.ndim - 2) - # Each output element required K ** 2 memory accesses + # Each output element required K ** 2 memory accesses input_dma = reduce(mul, kernel_size) * output.numel() output_dma = output.numel() @@ -220,14 +220,12 @@ def dmas_pool(module: Union[_MaxPoolNd, _AvgPoolNd], input: Tensor, output: Tens def dmas_adaptive_pool(module: Union[_AdaptiveMaxPoolNd, _AdaptiveAvgPoolNd], input: Tensor, output: Tensor) -> int: """DMAs estimation for adaptive spatial pooling modules""" - if isinstance(module.output_size, tuple): - o_sizes = module.output_size - else: - o_sizes = (module.output_size,) * (input.ndim - 2) # type: ignore[attr-defined] - # Approximate kernel_size using ratio of spatial shapes between input and output - kernel_size = tuple(i_size // o_size if (i_size % o_size) == 0 else i_size - o_size * (i_size // o_size) + 1 - for i_size, o_size in zip(input.shape[2:], o_sizes)) - # Each output element required K ** 2 memory accesses + # Approximate kernel_size using ratio of spatial shapes between input and output + kernel_size = tuple( + i_size // o_size if (i_size % o_size) == 0 else i_size - o_size * (i_size // o_size) + 1 + for i_size, o_size in zip(input.shape[2:], output.shape[2:]) + ) + # Each output element required K ** 2 memory accesses input_dma = reduce(mul, kernel_size) * output.numel() output_dma = output.numel() diff --git a/torchscan/modules/receptive.py b/torchscan/modules/receptive.py index 1fe6e1b..046c6d2 100644 --- a/torchscan/modules/receptive.py +++ b/torchscan/modules/receptive.py @@ -13,7 +13,7 @@ from torch.nn.modules.conv import _ConvNd, _ConvTransposeNd from torch.nn.modules.pooling import _AdaptiveAvgPoolNd, _AdaptiveMaxPoolNd, _AvgPoolNd, _MaxPoolNd -__all__ = ['module_rf'] +__all__ = ["module_rf"] def module_rf(module: Module, input: Tensor, output: Tensor) -> Tuple[float, float, float]: @@ -28,9 +28,23 @@ def module_rf(module: Module, input: Tensor, output: Tensor) -> Tuple[float, flo effective stride effective padding """ - if isinstance(module, (nn.Identity, nn.Flatten, nn.ReLU, nn.ELU, nn.LeakyReLU, nn.ReLU6, nn.Tanh, nn.Sigmoid, - _BatchNorm, nn.Dropout, nn.Linear)): - return 1., 1., 0. + if isinstance( + module, + ( + nn.Identity, + nn.Flatten, + nn.ReLU, + nn.ELU, + nn.LeakyReLU, + nn.ReLU6, + nn.Tanh, + nn.Sigmoid, + _BatchNorm, + nn.Dropout, + nn.Linear, + ), + ): + return 1.0, 1.0, 0.0 elif isinstance(module, _ConvTransposeNd): return rf_convtransposend(module, input, output) elif isinstance(module, (_ConvNd, _MaxPoolNd, _AvgPoolNd)): @@ -38,23 +52,21 @@ def module_rf(module: Module, input: Tensor, output: Tensor) -> Tuple[float, flo elif isinstance(module, (_AdaptiveMaxPoolNd, _AdaptiveAvgPoolNd)): return rf_adaptive_poolnd(module, input, output) else: - warnings.warn(f'Module type not supported: {module.__class__.__name__}') - return 1., 1., 0. + warnings.warn(f"Module type not supported: {module.__class__.__name__}") + return 1.0, 1.0, 0.0 def rf_convtransposend(module: _ConvTransposeNd, intput: Tensor, output: Tensor) -> Tuple[float, float, float]: k = module.kernel_size[0] if isinstance(module.kernel_size, tuple) else module.kernel_size s = module.stride[0] if isinstance(module.stride, tuple) else module.stride - return -k, 1. / s, 0. # type: ignore[operator] + return -k, 1.0 / s, 0.0 def rf_aggregnd( - module: Union[_ConvNd, _MaxPoolNd, _AvgPoolNd], - input: Tensor, - output: Tensor + module: Union[_ConvNd, _MaxPoolNd, _AvgPoolNd], input: Tensor, output: Tensor ) -> Tuple[float, float, float]: k = module.kernel_size[0] if isinstance(module.kernel_size, tuple) else module.kernel_size - if hasattr(module, 'dilation'): + if hasattr(module, "dilation"): d = module.dilation[0] if isinstance(module.dilation, tuple) else module.dilation k = d * (k - 1) + 1 # type: ignore[operator] s = module.stride[0] if isinstance(module.stride, tuple) else module.stride @@ -63,9 +75,7 @@ def rf_aggregnd( def rf_adaptive_poolnd( - module: Union[_AdaptiveMaxPoolNd, _AdaptiveAvgPoolNd], - input: Tensor, - output: Tensor + module: Union[_AdaptiveMaxPoolNd, _AdaptiveAvgPoolNd], input: Tensor, output: Tensor ) -> Tuple[int, int, float]: stride = math.ceil(input.shape[-1] / output.shape[-1]) diff --git a/torchscan/process/memory.py b/torchscan/process/memory.py index e96b9ca..83c4e25 100644 --- a/torchscan/process/memory.py +++ b/torchscan/process/memory.py @@ -7,7 +7,7 @@ import subprocess import warnings -__all__ = ['get_process_gpu_ram'] +__all__ = ["get_process_gpu_ram"] def get_process_gpu_ram(pid: int) -> float: @@ -31,4 +31,4 @@ def get_process_gpu_ram(pid: int) -> float: warnings.warn(f"raised: {e}. Assuming no GPU is available.") # Otherwise assume the process is running exclusively on CPU - return 0. + return 0.0 diff --git a/torchscan/utils.py b/torchscan/utils.py index 93d6f3c..0670710 100644 --- a/torchscan/utils.py +++ b/torchscan/utils.py @@ -24,7 +24,7 @@ def format_name(name: str, depth: int = 0) -> str: return f"{'| ' * (depth - 1)}└─{name}" -def wrap_string(s: str, max_len: int, delimiter: str = '.', wrap: str = '[...]', mode: str = 'end') -> str: +def wrap_string(s: str, max_len: int, delimiter: str = ".", wrap: str = "[...]", mode: str = "end") -> str: """Wrap a string into a given length Args: @@ -40,12 +40,12 @@ def wrap_string(s: str, max_len: int, delimiter: str = '.', wrap: str = '[...]', if len(s) <= max_len or mode is None: return s - if mode == 'end': - return s[:max_len - len(wrap)] + wrap - elif mode == 'mid': + if mode == "end": + return s[: max_len - len(wrap)] + wrap + elif mode == "mid": final_part = s.rpartition(delimiter)[-1] wrapped_end = f"{wrap}.{final_part}" - return s[:max_len - len(wrapped_end)] + wrapped_end + return s[: max_len - len(wrapped_end)] + wrapped_end else: raise ValueError("received an unexpected value of argument `mode`") @@ -60,15 +60,15 @@ def unit_scale(val: float) -> Tuple[float, str]: """ if val // 1e12 > 0: - return val / 1e12, 'T' + return val / 1e12, "T" elif val // 1e9 > 0: - return val / 1e9, 'G' + return val / 1e9, "G" elif val // 1e6 > 0: - return val / 1e6, 'M' + return val / 1e6, "M" elif val // 1e3 > 0: - return val / 1e3, 'k' + return val / 1e3, "k" else: - return val, '' + return val, "" def format_s(f_string, min_w: Optional[int] = None, max_w: Optional[int] = None) -> str: @@ -83,21 +83,23 @@ def format_s(f_string, min_w: Optional[int] = None, max_w: Optional[int] = None) def format_line_str( layer: Dict[str, Any], col_w: Optional[List[int]] = None, - wrap_mode: str = 'mid', + wrap_mode: str = "mid", receptive_field: bool = False, - effective_rf_stats: bool = False + effective_rf_stats: bool = False, ) -> List[str]: if not isinstance(col_w, list): col_w = [None] * 7 # type: ignore[list-item] max_len = col_w[0] + 3 if isinstance(col_w[0], int) else 100 - line_str = [format_s(wrap_string(format_name(layer['name'], layer['depth']), max_len, mode=wrap_mode), - col_w[0], col_w[0])] - line_str.append(format_s(layer['type'], col_w[1], col_w[1])) - line_str.append(format_s(str(layer['output_shape']), col_w[2], col_w[2])) - line_str.append(format_s(f"{layer['grad_params'] + layer['nograd_params'] + layer['num_buffers']:,}", - col_w[3], col_w[3])) + line_str = [ + format_s(wrap_string(format_name(layer["name"], layer["depth"]), max_len, mode=wrap_mode), col_w[0], col_w[0]) + ] + line_str.append(format_s(layer["type"], col_w[1], col_w[1])) + line_str.append(format_s(str(layer["output_shape"]), col_w[2], col_w[2])) + line_str.append( + format_s(f"{layer['grad_params'] + layer['nograd_params'] + layer['num_buffers']:,}", col_w[3], col_w[3]) + ) if receptive_field: line_str.append(format_s(f"{layer['rf']:.0f}", col_w[4], col_w[4])) @@ -109,10 +111,7 @@ def format_line_str( def format_info( - module_info: Dict[str, Any], - wrap_mode: str = 'mid', - receptive_field: bool = False, - effective_rf_stats: bool = False + module_info: Dict[str, Any], wrap_mode: str = "mid", receptive_field: bool = False, effective_rf_stats: bool = False ) -> str: """Print module summary for an expected input tensor shape @@ -128,14 +127,18 @@ def format_info( # Set margin between cols margin = 4 # Dynamic col width - # Init with headers - headers = ['Layer', 'Type', 'Output Shape', 'Param #', 'Receptive field', 'Effective stride', 'Effective padding'] + # Init with headers + headers = ["Layer", "Type", "Output Shape", "Param #", "Receptive field", "Effective stride", "Effective padding"] max_w = [27, 20, 25, 15, 15, 16, 17] col_w = [len(s) for s in headers] - for layer in module_info['layers']: - col_w = [max(v, len(s)) - for v, s in zip(col_w, format_line_str(layer, col_w=None, wrap_mode=wrap_mode, - receptive_field=True, effective_rf_stats=True))] + for layer in module_info["layers"]: + col_w = [ + max(v, len(s)) + for v, s in zip( + col_w, + format_line_str(layer, col_w=None, wrap_mode=wrap_mode, receptive_field=True, effective_rf_stats=True), + ) + ] # Truncate columns that are too long col_w = [min(v, max_v) for v, max_v in zip(col_w, max_w)] @@ -149,11 +152,11 @@ def format_info( # Define separating lines line_length = sum(col_w) + (len(col_w) - 1) * margin - thin_line = '_' * line_length - thick_line = '=' * line_length - dot_line = '-' * line_length + thin_line = "_" * line_length + thick_line = "=" * line_length + dot_line = "-" * line_length - margin_str = ' ' * margin + margin_str = " " * margin # Header info_str = [thin_line] @@ -161,24 +164,24 @@ def format_info( info_str.append(thick_line) # Layers - for layer in module_info['layers']: + for layer in module_info["layers"]: line_str = format_line_str(layer, col_w, wrap_mode, receptive_field, effective_rf_stats) - info_str.append((' ' * margin).join(line_str)) + info_str.append((" " * margin).join(line_str)) # Parameter information info_str.append(thick_line) info_str.append(f"Trainable params: {module_info['overall']['grad_params']:,}") info_str.append(f"Non-trainable params: {module_info['overall']['nograd_params']:,}") - num_params = module_info['overall']['grad_params'] + module_info['overall']['nograd_params'] + num_params = module_info["overall"]["grad_params"] + module_info["overall"]["nograd_params"] info_str.append(f"Total params: {num_params:,}") # Static RAM usage info_str.append(dot_line) # Convert to Megabytes - param_size = (module_info['overall']['param_size'] + module_info['overall']['buffer_size']) / 1024 ** 2 - overhead = module_info['overheads']['framework']['fwd'] + module_info['overheads']['cuda']['fwd'] + param_size = (module_info["overall"]["param_size"] + module_info["overall"]["buffer_size"]) / 1024**2 + overhead = module_info["overheads"]["framework"]["fwd"] + module_info["overheads"]["cuda"]["fwd"] info_str.append(f"Model size (params + buffers): {param_size:.2f} Mb") info_str.append(f"Framework & CUDA overhead: {overhead:.2f} Mb") @@ -187,9 +190,9 @@ def format_info( # FLOPS information info_str.append(dot_line) - flops, flops_units = unit_scale(sum(layer['flops'] for layer in module_info['layers'])) - macs, macs_units = unit_scale(sum(layer['macs'] for layer in module_info['layers'])) - dmas, dmas_units = unit_scale(sum(layer['dmas'] for layer in module_info['layers'])) + flops, flops_units = unit_scale(sum(layer["flops"] for layer in module_info["layers"])) + macs, macs_units = unit_scale(sum(layer["macs"] for layer in module_info["layers"])) + dmas, dmas_units = unit_scale(sum(layer["dmas"] for layer in module_info["layers"])) info_str.append(f"Floating Point Operations on forward: {flops:.2f} {flops_units}FLOPs") info_str.append(f"Multiply-Accumulations on forward: {macs:.2f} {macs_units}MACs") @@ -197,7 +200,7 @@ def format_info( info_str.append(thin_line) - return '\n'.join(info_str) + return "\n".join(info_str) def aggregate_info(info: Dict[str, Any], max_depth: int) -> Dict[str, Any]: @@ -210,44 +213,44 @@ def aggregate_info(info: Dict[str, Any], max_depth: int) -> Dict[str, Any]: edited dictionary information """ - if not any(layer['depth'] == max_depth for layer in info['layers']): + if not any(layer["depth"] == max_depth for layer in info["layers"]): raise ValueError("The `max_depth` argument cannot be higher than module depth.") - for fw_idx, layer in enumerate(info['layers']): - # Need to aggregate information - if not layer['is_leaf'] and layer['depth'] == max_depth: + for fw_idx, layer in enumerate(info["layers"]): + # Need to aggregate information + if not layer["is_leaf"] and layer["depth"] == max_depth: grad_p, nograd_p, p_size, num_buffers, b_size = 0, 0, 0, 0, 0 flops, macs, dmas = 0, 0, 0 - for _layer in info['layers'][fw_idx + 1:]: + for _layer in info["layers"][fw_idx + 1 :]: # Children have superior depth and were hooked after parent - if _layer['depth'] <= max_depth: + if _layer["depth"] <= max_depth: break # Aggregate all information (flops, macc, ram) - flops += _layer['flops'] - macs += _layer['macs'] - dmas += _layer['dmas'] - grad_p += _layer['grad_params'] - nograd_p += _layer['nograd_params'] - p_size += _layer['param_size'] - num_buffers += _layer['num_buffers'] - b_size += _layer['buffer_size'] + flops += _layer["flops"] + macs += _layer["macs"] + dmas += _layer["dmas"] + grad_p += _layer["grad_params"] + nograd_p += _layer["nograd_params"] + p_size += _layer["param_size"] + num_buffers += _layer["num_buffers"] + b_size += _layer["buffer_size"] # Take last child effective RF - _rf, _s, _p = _layer['rf'], _layer['s'], _layer['p'] + _rf, _s, _p = _layer["rf"], _layer["s"], _layer["p"] # Update info - info['layers'][fw_idx]['flops'] = flops - info['layers'][fw_idx]['macs'] = macs - info['layers'][fw_idx]['dmas'] = dmas - info['layers'][fw_idx]['rf'] = _rf - info['layers'][fw_idx]['s'] = _s - info['layers'][fw_idx]['p'] = _p - info['layers'][fw_idx]['grad_params'] = grad_p - info['layers'][fw_idx]['nograd_params'] = nograd_p - info['layers'][fw_idx]['param_size'] = p_size - info['layers'][fw_idx]['num_buffers'] = num_buffers - info['layers'][fw_idx]['buffer_size'] = b_size + info["layers"][fw_idx]["flops"] = flops + info["layers"][fw_idx]["macs"] = macs + info["layers"][fw_idx]["dmas"] = dmas + info["layers"][fw_idx]["rf"] = _rf + info["layers"][fw_idx]["s"] = _s + info["layers"][fw_idx]["p"] = _p + info["layers"][fw_idx]["grad_params"] = grad_p + info["layers"][fw_idx]["nograd_params"] = nograd_p + info["layers"][fw_idx]["param_size"] = p_size + info["layers"][fw_idx]["num_buffers"] = num_buffers + info["layers"][fw_idx]["buffer_size"] = b_size # Filter out further depth information - info['layers'] = [layer for layer in info['layers'] if layer['depth'] <= max_depth] + info["layers"] = [layer for layer in info["layers"] if layer["depth"] <= max_depth] return info