feat(//tools/perf): Refactor perf_run.py, add fx2trt backend support, usage via CLI arguments #1254

peri044 · 2022-08-11T18:16:32Z

Description

Refactor perf_run.py, add fx2trt backend support, usage via CLI arguments. Added a benchmark.sh which is used for internal perf regression testing.

Type of change

New feature (non-breaking change which adds functionality)

Checklist:

My code follows the style guidelines of this project (You can use the linters)
I have performed a self-review of my own code
I have commented my code, particularly in hard-to-understand areas and hacks
I have made corresponding changes to the documentation
I have added tests to verify my fix or my feature
New and existing unit tests pass locally with my changes
I have added the relevant labels to my PR in so that relevant reviewers are notified

Signed-off-by: dperi <dperi@nvidia.com>

…I arguments Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>

Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>

github-actions

Code conforms to C++ style guidelines

github-actions

Code conforms to C++ style guidelines

github-actions

Code conforms to C++ style guidelines

github-actions

There are some changes that do not conform to Python style guidelines:

--- tools/perf/custom_models.py	2022-08-11 18:17:06.779492 +0000
+++ tools/perf/custom_models.py	2022-08-11 18:20:27.317117 +0000
@@ -1,9 +1,10 @@
import torch
import torch.nn as nn
from transformers import BertModel, BertTokenizer, BertConfig
import torch.nn.functional as F
+

def BertModule():
    model_name = "bert-base-uncased"
    enc = BertTokenizer.from_pretrained(model_name)
    text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
--- tools/perf/hub.py	2022-08-11 18:17:06.779492 +0000
+++ tools/perf/hub.py	2022-08-11 18:20:27.448176 +0000
@@ -15,40 +15,25 @@
# Detect case of no GPU before deserialization of models on GPU
if not torch.cuda.is_available():
    raise Exception("No GPU found. Please check if installed torch version is compatible with CUDA version")

# Downloads all model files again if manifest file is not present
-MANIFEST_FILE = 'model_manifest.json'
+MANIFEST_FILE = "model_manifest.json"

BENCHMARK_MODELS = {
-    "vgg16": {
-        "model": models.vgg16(weights=None),
-        "path": "script"
-    },
-    "resnet50": {
-        "model": models.resnet50(weights=None),
-        "path": "script"
-    },
-    "efficientnet_b0": {
-        "model": timm.create_model('efficientnet_b0', pretrained=True),
-        "path": "script"
-    },
-    "vit": {
-        "model": timm.create_model('vit_base_patch16_224', pretrained=True),
-        "path": "script"
-    },
-    "bert_base_uncased": {
-        "model": cm.BertModule(),
-        "path": "trace"
-    },
+    "vgg16": {"model": models.vgg16(weights=None), "path": "script"},
+    "resnet50": {"model": models.resnet50(weights=None), "path": "script"},
+    "efficientnet_b0": {"model": timm.create_model("efficientnet_b0", pretrained=True), "path": "script"},
+    "vit": {"model": timm.create_model("vit_base_patch16_224", pretrained=True), "path": "script"},
+    "bert_base_uncased": {"model": cm.BertModule(), "path": "trace"},
}


def get(n, m, manifest):
    print("Downloading {}".format(n))
-    traced_filename = "models/" + n + '_traced.jit.pt'
-    script_filename = "models/" + n + '_scripted.jit.pt'
+    traced_filename = "models/" + n + "_traced.jit.pt"
+    script_filename = "models/" + n + "_scripted.jit.pt"
    x = torch.ones((1, 3, 300, 300)).cuda()
    if n == "bert-base-uncased":
        traced_model = m["model"]
        torch.jit.save(traced_model, traced_filename)
        manifest.update({n: [traced_filename]})
@@ -78,13 +63,15 @@
    else:
        for n, m in BENCHMARK_MODELS.items():
            scripted_filename = "models/" + n + "_scripted.jit.pt"
            traced_filename = "models/" + n + "_traced.jit.pt"
            # Check if model file exists on disk
-            if (m["path"] == "both" and os.path.exists(scripted_filename) and os.path.exists(traced_filename)) or \
-               (m["path"] == "script" and os.path.exists(scripted_filename)) or \
-               (m["path"] == "trace" and os.path.exists(traced_filename)):
+            if (
+                (m["path"] == "both" and os.path.exists(scripted_filename) and os.path.exists(traced_filename))
+                or (m["path"] == "script" and os.path.exists(scripted_filename))
+                or (m["path"] == "trace" and os.path.exists(traced_filename))
+            ):
                print("Skipping {} ".format(n))
                continue
            manifest = get(n, m, manifest)


@@ -96,31 +83,35 @@
    # Check if Manifest file exists or is empty
    if not os.path.exists(MANIFEST_FILE) or os.stat(MANIFEST_FILE).st_size == 0:
        manifest = {"version": torch_version}

        # Creating an empty manifest file for overwriting post setup
-        os.system('touch {}'.format(MANIFEST_FILE))
+        os.system("touch {}".format(MANIFEST_FILE))
    else:
        manifest_exists = True

        # Load manifest if already exists
-        with open(MANIFEST_FILE, 'r') as f:
+        with open(MANIFEST_FILE, "r") as f:
            manifest = json.load(f)
-            if manifest['version'] == torch_version:
+            if manifest["version"] == torch_version:
                version_matches = True
            else:
-                print("Torch version: {} mismatches \
+                print(
+                    "Torch version: {} mismatches \
                with manifest's version: {}. Re-downloading \
-                all models".format(torch_version, manifest['version']))
+                all models".format(
+                        torch_version, manifest["version"]
+                    )
+                )

                # Overwrite the manifest version as current torch version
-                manifest['version'] = torch_version
+                manifest["version"] = torch_version

    download_models(version_matches, manifest)

    # Write updated manifest file to disk
-    with open(MANIFEST_FILE, 'r+') as f:
+    with open(MANIFEST_FILE, "r+") as f:
        data = f.read()
        f.seek(0)
        record = json.dumps(manifest)
        f.write(record)
        f.truncate()
--- tools/perf/utils.py	2022-08-11 18:17:06.779492 +0000
+++ tools/perf/utils.py	2022-08-11 18:20:27.479495 +0000
@@ -3,57 +3,46 @@
import custom_models as cm
import torchvision.models as models
import timm

BENCHMARK_MODELS = {
-    "vgg16": {
-        "model": models.vgg16(pretrained=True),
-        "path": "script"
-    },
-    "resnet50": {
-        "model": torch.hub.load('pytorch/vision:v0.9.0', 'resnet50', pretrained=True),
-        "path": "script"
-    },
-    "efficientnet_b0": {
-        "model": timm.create_model('efficientnet_b0', pretrained=True),
-        "path": "script"
-    },
-    "vit": {
-        "model": timm.create_model('vit_base_patch16_224', pretrained=True),
-        "path": "script"
-    },
-    "bert_base_uncased": {
-        "model": cm.BertModule(),
-        "path": "trace"
-    },
+    "vgg16": {"model": models.vgg16(pretrained=True), "path": "script"},
+    "resnet50": {"model": torch.hub.load("pytorch/vision:v0.9.0", "resnet50", pretrained=True), "path": "script"},
+    "efficientnet_b0": {"model": timm.create_model("efficientnet_b0", pretrained=True), "path": "script"},
+    "vit": {"model": timm.create_model("vit_base_patch16_224", pretrained=True), "path": "script"},
+    "bert_base_uncased": {"model": cm.BertModule(), "path": "trace"},
}

+
def precision_to_dtype(pr):
-    if pr == 'fp32':
+    if pr == "fp32":
        return torch.float
-    elif pr == 'fp16' or pr == 'half':
+    elif pr == "fp16" or pr == "half":
        return torch.half
-    elif pr == 'int32':
+    elif pr == "int32":
        return torch.int32
-    elif pr == 'bool':
+    elif pr == "bool":
        return torch.bool
    else:
        return torch.float32

+
def parse_inputs(user_inputs, dtype):
-    parsed_inputs = user_inputs.split(';')
+    parsed_inputs = user_inputs.split(";")
    torchtrt_inputs = []
    for input in parsed_inputs:
        input_shape = []
-        input_shape_and_dtype = input.split('@')
+        input_shape_and_dtype = input.split("@")
        dtype = precision_to_dtype(input_shape_and_dtype[1]) if len(input_shape_and_dtype) == 2 else dtype
-        for input_dim in input_shape_and_dtype[0][1:-1].split(','):
+        for input_dim in input_shape_and_dtype[0][1:-1].split(","):
            input_shape.append(int(input_dim))
        torchtrt_inputs.append(torch.randint(0, 5, input_shape, dtype=dtype).cuda())

    return torchtrt_inputs

+
def parse_backends(backends):
-    return backends.split(',')
+    return backends.split(",")
+

def parse_precisions(precisions):
-    return precisions.split(',')
+    return precisions.split(",")
--- tools/perf/perf_run.py	2022-08-11 18:17:06.779492 +0000
+++ tools/perf/perf_run.py	2022-08-11 18:20:27.591280 +0000
@@ -42,18 +42,21 @@

    # Retrieves the value from the configuration else uses default values
    def get(self, key, default_value=None):
        if not key in self.params:
            if not default_value:
-                raise ValueError('Key {} is not present and default_value is not configured. Please run it with default value', key)
+                raise ValueError(
+                    "Key {} is not present and default_value is not configured. Please run it with default value", key
+                )
            self.params[key] = default_value
        return self.params[key]
+

# Runs inference using Torch backend
def run_torch(model, input_tensors, params, precision, batch_size):
    print("Running Torch for precision: ", precision, " batch_size : ", batch_size)
-    iters = params.get('iterations', 20)
+    iters = params.get("iterations", 20)

    # Warm up
    with torch.no_grad():
        for _ in range(WARMUP_ITER):
            features = model(*input_tensors)
@@ -69,29 +72,30 @@
            end_time = timeit.default_timer()
            meas_time = end_time - start_time
            timings.append(meas_time)

    recordStats("Torch", timings, precision, batch_size)
+

# Runs inference using Torch-TensorRT backend
def run_torch_tensorrt(model, input_tensors, params, precision, truncate_long_and_double, batch_size):
    print("Running Torch-TensorRT for precision: ", precision, " batch_size : ", batch_size)
    # Compiling Torch-TensorRT model
    compile_settings = {
-       "inputs": input_tensors,
-       "enabled_precisions": {precision_to_dtype(precision)} ,
-       "truncate_long_and_double": truncate_long_and_double,
-       "min_block_size" : 1,
+        "inputs": input_tensors,
+        "enabled_precisions": {precision_to_dtype(precision)},
+        "truncate_long_and_double": truncate_long_and_double,
+        "min_block_size": 1,
    }

-    if precision == 'int8':
-        compile_settings.update({"calib": params.get('calibration_cache')})
+    if precision == "int8":
+        compile_settings.update({"calib": params.get("calibration_cache")})

    with torchtrt.logging.errors():
        model = torchtrt.compile(model, **compile_settings)

-    iters = params.get('iterations', 20)
+    iters = params.get("iterations", 20)
    # Warm up
    with torch.no_grad():
        for _ in range(WARMUP_ITER):
            features = model(*input_tensors)

@@ -106,10 +110,11 @@
            end_time = timeit.default_timer()
            meas_time = end_time - start_time
            timings.append(meas_time)

    recordStats("Torch-TensorRT", timings, precision, batch_size)
+

# Runs inference using FX2TRT backend
def run_fx2trt(model, input_tensors, params, precision, batch_size):
    print("Running FX2TRT for precision: ", precision, " batch_size : ", batch_size)
    if precision == "fp32":
@@ -125,11 +130,11 @@
        max_batch_size=batch_size,
        lower_precision=precision,
        verbose_log=False,
    )

-    iters = params.get('iterations', 20)
+    iters = params.get("iterations", 20)
    # Warm up
    with torch.no_grad():
        for _ in range(WARMUP_ITER):
            features = model(*input_tensors)

@@ -144,10 +149,11 @@
            end_time = timeit.default_timer()
            meas_time = end_time - start_time
            timings.append(meas_time)

    recordStats("FX-TensorRT", timings, precision, batch_size)
+

def torch_dtype_from_trt(dtype):
    if dtype == trt.int8:
        return torch.int8
    elif dtype == trt.bool:
@@ -159,20 +165,23 @@
    elif dtype == trt.float32:
        return torch.float32
    else:
        raise TypeError("%s is not supported by torch" % dtype)

+
def torch_device_from_trt(device):
    if device == trt.TensorLocation.DEVICE:
        return torch.device("cuda")
    elif device == trt.TensorLocation.HOST:
        return torch.device("cpu")
    else:
        return TypeError("%s is not supported by torch" % device)


-def run_tensorrt(model, input_tensors, params, precision, truncate_long_and_double=False, is_trt_engine=False, batch_size=1):
+def run_tensorrt(
+    model, input_tensors, params, precision, truncate_long_and_double=False, is_trt_engine=False, batch_size=1
+):
    engine = None

    # If the model file is a TensorRT engine then directly deserialize and run inference
    # else convert the torch module to a TensorRT engine first and then run inference
    if not is_trt_engine:
@@ -189,16 +198,16 @@
    # Deserialize the TensorRT engine
    with trt.Logger() as logger, trt.Runtime(logger) as runtime:
        engine = runtime.deserialize_cuda_engine(model)

    print("Running TensorRT for precision: ", precision, " batch_size : ", batch_size)
-    iters = params.get('iterations', 20)
+    iters = params.get("iterations", 20)

    # Compiling the bindings
    bindings = engine.num_bindings * [None]
    k = 0
-    for idx,_ in enumerate(bindings):
+    for idx, _ in enumerate(bindings):
        dtype = torch_dtype_from_trt(engine.get_binding_dtype(idx))
        shape = tuple(engine.get_binding_shape(idx))
        device = torch_device_from_trt(engine.get_location(idx))
        if not engine.binding_is_input(idx):
            # Output bindings
@@ -223,23 +232,26 @@
            meas_time = end_time - start_time
            timings.append(meas_time)

    recordStats("TensorRT", timings, precision, batch_size)

+
# Deploys inference run for different backend configurations
-def run(model, backends, input_tensors, params, precision, truncate_long_and_double=False, batch_size=1, is_trt_engine=False):
+def run(
+    model, backends, input_tensors, params, precision, truncate_long_and_double=False, batch_size=1, is_trt_engine=False
+):
    for backend in backends:
-        if precision == 'int8':
-            if backend == 'all' or backend == 'torch':
+        if precision == "int8":
+            if backend == "all" or backend == "torch":
                print("int8 precision is not supported for torch runtime in this script yet")
                return False

-            if backend == 'all' or backend == 'torch_tensorrt' or params.get('calibration_cache', None) == None:
+            if backend == "all" or backend == "torch_tensorrt" or params.get("calibration_cache", None) == None:
                print("int8 precision expects calibration cache file for inference")
                return False

-        if backend == 'all':
+        if backend == "all":
            run_torch(model, input_tensors, params, precision, batch_size)
            run_torch_tensorrt(model, input_tensors, params, precision, truncate_long_and_double, batch_size)
            run_tensorrt(model, input_tensors, params, precision, truncate_long_and_double, is_trt_engine, batch_size)

        elif backend == "torch":
@@ -252,12 +264,13 @@
            run_fx2trt(model, input_tensors, params, precision, batch_size)

        elif backend == "tensorrt":
            run_tensorrt(model, input_tensors, params, precision, truncate_long_and_double, is_trt_engine, batch_size)

+
# Generate report
-def recordStats(backend, timings, precision, batch_size = 1):
+def recordStats(backend, timings, precision, batch_size=1):
    times = np.array(timings)
    steps = len(times)
    speeds = batch_size / times
    time_mean = np.mean(times)
    time_med = np.median(times)
@@ -265,55 +278,77 @@
    time_std = np.std(times, ddof=0)
    speed_mean = np.mean(speeds)
    speed_med = np.median(speeds)

    stats = {
-        'Backend' : backend,
-        'Precision' : precision,
-        'Batch size' : batch_size,
-        'Median(FPS)' : speed_med,
-        'Mean(FPS)' : speed_mean,
-        'Median-Latency(ms)' : time_med,
-        'Mean-Latency(ms)' : time_mean,
+        "Backend": backend,
+        "Precision": precision,
+        "Batch size": batch_size,
+        "Median(FPS)": speed_med,
+        "Mean(FPS)": speed_mean,
+        "Median-Latency(ms)": time_med,
+        "Mean-Latency(ms)": time_mean,
    }
    results.append(stats)
+

def load_model(params):
    model = None
    is_trt_engine = False
    # Load torch model traced/scripted
-    model_file = params.get('model').get('filename')
-    try :
-        model_name = params.get('model').get('name')
+    model_file = params.get("model").get("filename")
+    try:
+        model_name = params.get("model").get("name")
    except:
        model_name = model_file

    print("Loading model: ", model_file)
-    if model_file.endswith('.plan'):
+    if model_file.endswith(".plan"):
        is_trt_engine = True
        # Read the TensorRT engine file
-        with open(model_file, 'rb') as fin:
+        with open(model_file, "rb") as fin:
            model = fin.read()
    else:
        model = torch.jit.load(model_file).cuda()

    return model, model_name, is_trt_engine


-if __name__ == '__main__':
+if __name__ == "__main__":
    arg_parser = argparse.ArgumentParser(description="Run inference on a model with random input values")
-    arg_parser.add_argument("--config", type=str, help="Load YAML based configuration file to run the inference. If this is used other params will be ignored")
+    arg_parser.add_argument(
+        "--config",
+        type=str,
+        help="Load YAML based configuration file to run the inference. If this is used other params will be ignored",
+    )
    # The following options are manual user provided settings
-    arg_parser.add_argument("--backends", type=str, help="Comma separated string of backends. Eg: torch,torch_tensorrt,tensorrt")
+    arg_parser.add_argument(
+        "--backends", type=str, help="Comma separated string of backends. Eg: torch,torch_tensorrt,tensorrt"
+    )
    arg_parser.add_argument("--model", type=str, help="Name of the model file")
-    arg_parser.add_argument("--inputs", type=str, help="List of input shapes. Eg: (1, 3, 224, 224)@fp32 for Resnet or (1, 128)@int32;(1, 128)@int32 for BERT")
+    arg_parser.add_argument(
+        "--inputs",
+        type=str,
+        help="List of input shapes. Eg: (1, 3, 224, 224)@fp32 for Resnet or (1, 128)@int32;(1, 128)@int32 for BERT",
+    )
    arg_parser.add_argument("--batch_size", type=int, default=1, help="Batch size to build and run")
-    arg_parser.add_argument("--precision", default="fp32", type=str, help="Comma separated list of precisions to build TensorRT engine Eg: fp32,fp16")
+    arg_parser.add_argument(
+        "--precision",
+        default="fp32",
+        type=str,
+        help="Comma separated list of precisions to build TensorRT engine Eg: fp32,fp16",
+    )
    arg_parser.add_argument("--calibration_cache", type=str, help="Name of the calibration cache file")
    arg_parser.add_argument("--device", type=int, help="device id")
-    arg_parser.add_argument("--truncate", action='store_true', help="Truncate long and double weights in the network  in Torch-TensorRT")
-    arg_parser.add_argument("--is_trt_engine", action='store_true', help="Boolean flag to determine if the user provided model is a TRT engine or not")
+    arg_parser.add_argument(
+        "--truncate", action="store_true", help="Truncate long and double weights in the network  in Torch-TensorRT"
+    )
+    arg_parser.add_argument(
+        "--is_trt_engine",
+        action="store_true",
+        help="Boolean flag to determine if the user provided model is a TRT engine or not",
+    )
    arg_parser.add_argument("--report", type=str, help="Path of the output file where performance summary is written.")
    args = arg_parser.parse_args()

    cudnn.benchmark = True
    # Create random input tensor of certain size
@@ -324,59 +359,69 @@
        # Load YAML params
        params = parser.read_config()
        model, model_name, is_trt_engine = load_model(params)

        # Default device is set to 0. Configurable using yaml config file.
-        torch.cuda.set_device(params.get('runtime').get('device', 0))
-
-        num_input = params.get('input').get('num_inputs')
-        truncate_long_and_double = params.get('runtime').get('truncate_long_and_double', False)
-        batch_size = params.get('input').get('batch_size', 1)
-        for precision in params.get('runtime').get('precision', 'fp32'):
+        torch.cuda.set_device(params.get("runtime").get("device", 0))
+
+        num_input = params.get("input").get("num_inputs")
+        truncate_long_and_double = params.get("runtime").get("truncate_long_and_double", False)
+        batch_size = params.get("input").get("batch_size", 1)
+        for precision in params.get("runtime").get("precision", "fp32"):
            input_tensors = []
-            num_input = params.get('input').get('num_inputs', 1)
+            num_input = params.get("input").get("num_inputs", 1)
            for i in range(num_input):
-                inp_tensor = params.get('input').get('input' + str(i))
-                input_tensors.append(torch.randint(0, 2, tuple(d for d in inp_tensor), dtype=precision_to_dtype(precision)).cuda())
+                inp_tensor = params.get("input").get("input" + str(i))
+                input_tensors.append(
+                    torch.randint(0, 2, tuple(d for d in inp_tensor), dtype=precision_to_dtype(precision)).cuda()
+                )

            if is_trt_engine:
-                print("Warning, TensorRT engine file is configured. Please make sure the precision matches with the TRT engine for reliable results")
+                print(
+                    "Warning, TensorRT engine file is configured. Please make sure the precision matches with the TRT engine for reliable results"
+                )

            if not is_trt_engine and (precision == "fp16" or precision == "half"):
                # If model is TensorRT serialized engine then model.half will report failure
                model = model.half()

-            backends = params.get('backend')
+            backends = params.get("backend")
            # Run inference
-            status = run(model, backends, input_tensors, params, precision, truncate_long_and_double, batch_size, is_trt_engine)
+            status = run(
+                model, backends, input_tensors, params, precision, truncate_long_and_double, batch_size, is_trt_engine
+            )
    else:
        params = vars(args)
-        model_name = params['model']
+        model_name = params["model"]
        if os.path.exists(model_name):
            print("Loading user provided model: ", model_name)
            model = torch.jit.load(model_name).cuda().eval()
        elif model_name in BENCHMARK_MODELS:
-            model = BENCHMARK_MODELS[model_name]['model'].eval().cuda()
+            model = BENCHMARK_MODELS[model_name]["model"].eval().cuda()
        else:
-            raise ValueError("Invalid model name. Please provide a torchscript model file or model name (among the following options vgg16|resnet50|efficientnet_b0|vit)")
-
-        backends = parse_backends(params['backends'])
-        truncate_long_and_double = params['truncate']
-        batch_size = params['batch_size']
-        is_trt_engine = params['is_trt_engine']
-        precisions = parse_precisions(params['precision'])
+            raise ValueError(
+                "Invalid model name. Please provide a torchscript model file or model name (among the following options vgg16|resnet50|efficientnet_b0|vit)"
+            )
+
+        backends = parse_backends(params["backends"])
+        truncate_long_and_double = params["truncate"]
+        batch_size = params["batch_size"]
+        is_trt_engine = params["is_trt_engine"]
+        precisions = parse_precisions(params["precision"])

        for precision in precisions:
-            input_tensors = parse_inputs(params['inputs'], precision_to_dtype(precision))
+            input_tensors = parse_inputs(params["inputs"], precision_to_dtype(precision))
            if not is_trt_engine and (precision == "fp16" or precision == "half"):
                # If model is TensorRT serialized engine then model.half will report failure
                model = model.half()
-            status = run(model, backends, input_tensors, params, precision, truncate_long_and_double, batch_size, is_trt_engine)
+            status = run(
+                model, backends, input_tensors, params, precision, truncate_long_and_double, batch_size, is_trt_engine
+            )

    # Generate report
-    print('Model Summary: ', model_name)
+    print("Model Summary: ", model_name)
    summary = pd.DataFrame(results)
    print(summary)
-    with open(args.report, 'w') as file:
-        file.write('Model Summary: ' + model_name + '\n')
+    with open(args.report, "w") as file:
+        file.write("Model Summary: " + model_name + "\n")
        file.write(summary.to_string())
    file.close()

github-actions

There are some changes that do not conform to Python style guidelines:

--- tools/perf/custom_models.py	2022-08-11 18:16:52.449633 +0000
+++ tools/perf/custom_models.py	2022-08-11 18:20:36.868136 +0000
@@ -1,9 +1,10 @@
import torch
import torch.nn as nn
from transformers import BertModel, BertTokenizer, BertConfig
import torch.nn.functional as F
+

def BertModule():
    model_name = "bert-base-uncased"
    enc = BertTokenizer.from_pretrained(model_name)
    text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
--- tools/perf/hub.py	2022-08-11 18:16:52.449633 +0000
+++ tools/perf/hub.py	2022-08-11 18:20:36.928094 +0000
@@ -15,40 +15,25 @@
# Detect case of no GPU before deserialization of models on GPU
if not torch.cuda.is_available():
    raise Exception("No GPU found. Please check if installed torch version is compatible with CUDA version")

# Downloads all model files again if manifest file is not present
-MANIFEST_FILE = 'model_manifest.json'
+MANIFEST_FILE = "model_manifest.json"

BENCHMARK_MODELS = {
-    "vgg16": {
-        "model": models.vgg16(weights=None),
-        "path": "script"
-    },
-    "resnet50": {
-        "model": models.resnet50(weights=None),
-        "path": "script"
-    },
-    "efficientnet_b0": {
-        "model": timm.create_model('efficientnet_b0', pretrained=True),
-        "path": "script"
-    },
-    "vit": {
-        "model": timm.create_model('vit_base_patch16_224', pretrained=True),
-        "path": "script"
-    },
-    "bert_base_uncased": {
-        "model": cm.BertModule(),
-        "path": "trace"
-    },
+    "vgg16": {"model": models.vgg16(weights=None), "path": "script"},
+    "resnet50": {"model": models.resnet50(weights=None), "path": "script"},
+    "efficientnet_b0": {"model": timm.create_model("efficientnet_b0", pretrained=True), "path": "script"},
+    "vit": {"model": timm.create_model("vit_base_patch16_224", pretrained=True), "path": "script"},
+    "bert_base_uncased": {"model": cm.BertModule(), "path": "trace"},
}


def get(n, m, manifest):
    print("Downloading {}".format(n))
-    traced_filename = "models/" + n + '_traced.jit.pt'
-    script_filename = "models/" + n + '_scripted.jit.pt'
+    traced_filename = "models/" + n + "_traced.jit.pt"
+    script_filename = "models/" + n + "_scripted.jit.pt"
    x = torch.ones((1, 3, 300, 300)).cuda()
    if n == "bert-base-uncased":
        traced_model = m["model"]
        torch.jit.save(traced_model, traced_filename)
        manifest.update({n: [traced_filename]})
@@ -78,13 +63,15 @@
    else:
        for n, m in BENCHMARK_MODELS.items():
            scripted_filename = "models/" + n + "_scripted.jit.pt"
            traced_filename = "models/" + n + "_traced.jit.pt"
            # Check if model file exists on disk
-            if (m["path"] == "both" and os.path.exists(scripted_filename) and os.path.exists(traced_filename)) or \
-               (m["path"] == "script" and os.path.exists(scripted_filename)) or \
-               (m["path"] == "trace" and os.path.exists(traced_filename)):
+            if (
+                (m["path"] == "both" and os.path.exists(scripted_filename) and os.path.exists(traced_filename))
+                or (m["path"] == "script" and os.path.exists(scripted_filename))
+                or (m["path"] == "trace" and os.path.exists(traced_filename))
+            ):
                print("Skipping {} ".format(n))
                continue
            manifest = get(n, m, manifest)


@@ -96,31 +83,35 @@
    # Check if Manifest file exists or is empty
    if not os.path.exists(MANIFEST_FILE) or os.stat(MANIFEST_FILE).st_size == 0:
        manifest = {"version": torch_version}

        # Creating an empty manifest file for overwriting post setup
-        os.system('touch {}'.format(MANIFEST_FILE))
+        os.system("touch {}".format(MANIFEST_FILE))
    else:
        manifest_exists = True

        # Load manifest if already exists
-        with open(MANIFEST_FILE, 'r') as f:
+        with open(MANIFEST_FILE, "r") as f:
            manifest = json.load(f)
-            if manifest['version'] == torch_version:
+            if manifest["version"] == torch_version:
                version_matches = True
            else:
-                print("Torch version: {} mismatches \
+                print(
+                    "Torch version: {} mismatches \
                with manifest's version: {}. Re-downloading \
-                all models".format(torch_version, manifest['version']))
+                all models".format(
+                        torch_version, manifest["version"]
+                    )
+                )

                # Overwrite the manifest version as current torch version
-                manifest['version'] = torch_version
+                manifest["version"] = torch_version

    download_models(version_matches, manifest)

    # Write updated manifest file to disk
-    with open(MANIFEST_FILE, 'r+') as f:
+    with open(MANIFEST_FILE, "r+") as f:
        data = f.read()
        f.seek(0)
        record = json.dumps(manifest)
        f.write(record)
        f.truncate()
--- tools/perf/utils.py	2022-08-11 18:16:52.449633 +0000
+++ tools/perf/utils.py	2022-08-11 18:20:36.964639 +0000
@@ -3,57 +3,46 @@
import custom_models as cm
import torchvision.models as models
import timm

BENCHMARK_MODELS = {
-    "vgg16": {
-        "model": models.vgg16(pretrained=True),
-        "path": "script"
-    },
-    "resnet50": {
-        "model": torch.hub.load('pytorch/vision:v0.9.0', 'resnet50', pretrained=True),
-        "path": "script"
-    },
-    "efficientnet_b0": {
-        "model": timm.create_model('efficientnet_b0', pretrained=True),
-        "path": "script"
-    },
-    "vit": {
-        "model": timm.create_model('vit_base_patch16_224', pretrained=True),
-        "path": "script"
-    },
-    "bert_base_uncased": {
-        "model": cm.BertModule(),
-        "path": "trace"
-    },
+    "vgg16": {"model": models.vgg16(pretrained=True), "path": "script"},
+    "resnet50": {"model": torch.hub.load("pytorch/vision:v0.9.0", "resnet50", pretrained=True), "path": "script"},
+    "efficientnet_b0": {"model": timm.create_model("efficientnet_b0", pretrained=True), "path": "script"},
+    "vit": {"model": timm.create_model("vit_base_patch16_224", pretrained=True), "path": "script"},
+    "bert_base_uncased": {"model": cm.BertModule(), "path": "trace"},
}

+
def precision_to_dtype(pr):
-    if pr == 'fp32':
+    if pr == "fp32":
        return torch.float
-    elif pr == 'fp16' or pr == 'half':
+    elif pr == "fp16" or pr == "half":
        return torch.half
-    elif pr == 'int32':
+    elif pr == "int32":
        return torch.int32
-    elif pr == 'bool':
+    elif pr == "bool":
        return torch.bool
    else:
        return torch.float32

+
def parse_inputs(user_inputs, dtype):
-    parsed_inputs = user_inputs.split(';')
+    parsed_inputs = user_inputs.split(";")
    torchtrt_inputs = []
    for input in parsed_inputs:
        input_shape = []
-        input_shape_and_dtype = input.split('@')
+        input_shape_and_dtype = input.split("@")
        dtype = precision_to_dtype(input_shape_and_dtype[1]) if len(input_shape_and_dtype) == 2 else dtype
-        for input_dim in input_shape_and_dtype[0][1:-1].split(','):
+        for input_dim in input_shape_and_dtype[0][1:-1].split(","):
            input_shape.append(int(input_dim))
        torchtrt_inputs.append(torch.randint(0, 5, input_shape, dtype=dtype).cuda())

    return torchtrt_inputs

+
def parse_backends(backends):
-    return backends.split(',')
+    return backends.split(",")
+

def parse_precisions(precisions):
-    return precisions.split(',')
+    return precisions.split(",")
--- tools/perf/perf_run.py	2022-08-11 18:16:52.449633 +0000
+++ tools/perf/perf_run.py	2022-08-11 18:20:37.147878 +0000
@@ -42,18 +42,21 @@

    # Retrieves the value from the configuration else uses default values
    def get(self, key, default_value=None):
        if not key in self.params:
            if not default_value:
-                raise ValueError('Key {} is not present and default_value is not configured. Please run it with default value', key)
+                raise ValueError(
+                    "Key {} is not present and default_value is not configured. Please run it with default value", key
+                )
            self.params[key] = default_value
        return self.params[key]
+

# Runs inference using Torch backend
def run_torch(model, input_tensors, params, precision, batch_size):
    print("Running Torch for precision: ", precision, " batch_size : ", batch_size)
-    iters = params.get('iterations', 20)
+    iters = params.get("iterations", 20)

    # Warm up
    with torch.no_grad():
        for _ in range(WARMUP_ITER):
            features = model(*input_tensors)
@@ -69,29 +72,30 @@
            end_time = timeit.default_timer()
            meas_time = end_time - start_time
            timings.append(meas_time)

    recordStats("Torch", timings, precision, batch_size)
+

# Runs inference using Torch-TensorRT backend
def run_torch_tensorrt(model, input_tensors, params, precision, truncate_long_and_double, batch_size):
    print("Running Torch-TensorRT for precision: ", precision, " batch_size : ", batch_size)
    # Compiling Torch-TensorRT model
    compile_settings = {
-       "inputs": input_tensors,
-       "enabled_precisions": {precision_to_dtype(precision)} ,
-       "truncate_long_and_double": truncate_long_and_double,
-       "min_block_size" : 1,
+        "inputs": input_tensors,
+        "enabled_precisions": {precision_to_dtype(precision)},
+        "truncate_long_and_double": truncate_long_and_double,
+        "min_block_size": 1,
    }

-    if precision == 'int8':
-        compile_settings.update({"calib": params.get('calibration_cache')})
+    if precision == "int8":
+        compile_settings.update({"calib": params.get("calibration_cache")})

    with torchtrt.logging.errors():
        model = torchtrt.compile(model, **compile_settings)

-    iters = params.get('iterations', 20)
+    iters = params.get("iterations", 20)
    # Warm up
    with torch.no_grad():
        for _ in range(WARMUP_ITER):
            features = model(*input_tensors)

@@ -106,10 +110,11 @@
            end_time = timeit.default_timer()
            meas_time = end_time - start_time
            timings.append(meas_time)

    recordStats("Torch-TensorRT", timings, precision, batch_size)
+

# Runs inference using FX2TRT backend
def run_fx2trt(model, input_tensors, params, precision, batch_size):
    print("Running FX2TRT for precision: ", precision, " batch_size : ", batch_size)
    if precision == "fp32":
@@ -125,11 +130,11 @@
        max_batch_size=batch_size,
        lower_precision=precision,
        verbose_log=False,
    )

-    iters = params.get('iterations', 20)
+    iters = params.get("iterations", 20)
    # Warm up
    with torch.no_grad():
        for _ in range(WARMUP_ITER):
            features = model(*input_tensors)

@@ -144,10 +149,11 @@
            end_time = timeit.default_timer()
            meas_time = end_time - start_time
            timings.append(meas_time)

    recordStats("FX-TensorRT", timings, precision, batch_size)
+

def torch_dtype_from_trt(dtype):
    if dtype == trt.int8:
        return torch.int8
    elif dtype == trt.bool:
@@ -159,20 +165,23 @@
    elif dtype == trt.float32:
        return torch.float32
    else:
        raise TypeError("%s is not supported by torch" % dtype)

+
def torch_device_from_trt(device):
    if device == trt.TensorLocation.DEVICE:
        return torch.device("cuda")
    elif device == trt.TensorLocation.HOST:
        return torch.device("cpu")
    else:
        return TypeError("%s is not supported by torch" % device)


-def run_tensorrt(model, input_tensors, params, precision, truncate_long_and_double=False, is_trt_engine=False, batch_size=1):
+def run_tensorrt(
+    model, input_tensors, params, precision, truncate_long_and_double=False, is_trt_engine=False, batch_size=1
+):
    engine = None

    # If the model file is a TensorRT engine then directly deserialize and run inference
    # else convert the torch module to a TensorRT engine first and then run inference
    if not is_trt_engine:
@@ -189,16 +198,16 @@
    # Deserialize the TensorRT engine
    with trt.Logger() as logger, trt.Runtime(logger) as runtime:
        engine = runtime.deserialize_cuda_engine(model)

    print("Running TensorRT for precision: ", precision, " batch_size : ", batch_size)
-    iters = params.get('iterations', 20)
+    iters = params.get("iterations", 20)

    # Compiling the bindings
    bindings = engine.num_bindings * [None]
    k = 0
-    for idx,_ in enumerate(bindings):
+    for idx, _ in enumerate(bindings):
        dtype = torch_dtype_from_trt(engine.get_binding_dtype(idx))
        shape = tuple(engine.get_binding_shape(idx))
        device = torch_device_from_trt(engine.get_location(idx))
        if not engine.binding_is_input(idx):
            # Output bindings
@@ -223,23 +232,26 @@
            meas_time = end_time - start_time
            timings.append(meas_time)

    recordStats("TensorRT", timings, precision, batch_size)

+
# Deploys inference run for different backend configurations
-def run(model, backends, input_tensors, params, precision, truncate_long_and_double=False, batch_size=1, is_trt_engine=False):
+def run(
+    model, backends, input_tensors, params, precision, truncate_long_and_double=False, batch_size=1, is_trt_engine=False
+):
    for backend in backends:
-        if precision == 'int8':
-            if backend == 'all' or backend == 'torch':
+        if precision == "int8":
+            if backend == "all" or backend == "torch":
                print("int8 precision is not supported for torch runtime in this script yet")
                return False

-            if backend == 'all' or backend == 'torch_tensorrt' or params.get('calibration_cache', None) == None:
+            if backend == "all" or backend == "torch_tensorrt" or params.get("calibration_cache", None) == None:
                print("int8 precision expects calibration cache file for inference")
                return False

-        if backend == 'all':
+        if backend == "all":
            run_torch(model, input_tensors, params, precision, batch_size)
            run_torch_tensorrt(model, input_tensors, params, precision, truncate_long_and_double, batch_size)
            run_tensorrt(model, input_tensors, params, precision, truncate_long_and_double, is_trt_engine, batch_size)

        elif backend == "torch":
@@ -252,12 +264,13 @@
            run_fx2trt(model, input_tensors, params, precision, batch_size)

        elif backend == "tensorrt":
            run_tensorrt(model, input_tensors, params, precision, truncate_long_and_double, is_trt_engine, batch_size)

+
# Generate report
-def recordStats(backend, timings, precision, batch_size = 1):
+def recordStats(backend, timings, precision, batch_size=1):
    times = np.array(timings)
    steps = len(times)
    speeds = batch_size / times
    time_mean = np.mean(times)
    time_med = np.median(times)
@@ -265,55 +278,77 @@
    time_std = np.std(times, ddof=0)
    speed_mean = np.mean(speeds)
    speed_med = np.median(speeds)

    stats = {
-        'Backend' : backend,
-        'Precision' : precision,
-        'Batch size' : batch_size,
-        'Median(FPS)' : speed_med,
-        'Mean(FPS)' : speed_mean,
-        'Median-Latency(ms)' : time_med,
-        'Mean-Latency(ms)' : time_mean,
+        "Backend": backend,
+        "Precision": precision,
+        "Batch size": batch_size,
+        "Median(FPS)": speed_med,
+        "Mean(FPS)": speed_mean,
+        "Median-Latency(ms)": time_med,
+        "Mean-Latency(ms)": time_mean,
    }
    results.append(stats)
+

def load_model(params):
    model = None
    is_trt_engine = False
    # Load torch model traced/scripted
-    model_file = params.get('model').get('filename')
-    try :
-        model_name = params.get('model').get('name')
+    model_file = params.get("model").get("filename")
+    try:
+        model_name = params.get("model").get("name")
    except:
        model_name = model_file

    print("Loading model: ", model_file)
-    if model_file.endswith('.plan'):
+    if model_file.endswith(".plan"):
        is_trt_engine = True
        # Read the TensorRT engine file
-        with open(model_file, 'rb') as fin:
+        with open(model_file, "rb") as fin:
            model = fin.read()
    else:
        model = torch.jit.load(model_file).cuda()

    return model, model_name, is_trt_engine


-if __name__ == '__main__':
+if __name__ == "__main__":
    arg_parser = argparse.ArgumentParser(description="Run inference on a model with random input values")
-    arg_parser.add_argument("--config", type=str, help="Load YAML based configuration file to run the inference. If this is used other params will be ignored")
+    arg_parser.add_argument(
+        "--config",
+        type=str,
+        help="Load YAML based configuration file to run the inference. If this is used other params will be ignored",
+    )
    # The following options are manual user provided settings
-    arg_parser.add_argument("--backends", type=str, help="Comma separated string of backends. Eg: torch,torch_tensorrt,tensorrt")
+    arg_parser.add_argument(
+        "--backends", type=str, help="Comma separated string of backends. Eg: torch,torch_tensorrt,tensorrt"
+    )
    arg_parser.add_argument("--model", type=str, help="Name of the model file")
-    arg_parser.add_argument("--inputs", type=str, help="List of input shapes. Eg: (1, 3, 224, 224)@fp32 for Resnet or (1, 128)@int32;(1, 128)@int32 for BERT")
+    arg_parser.add_argument(
+        "--inputs",
+        type=str,
+        help="List of input shapes. Eg: (1, 3, 224, 224)@fp32 for Resnet or (1, 128)@int32;(1, 128)@int32 for BERT",
+    )
    arg_parser.add_argument("--batch_size", type=int, default=1, help="Batch size to build and run")
-    arg_parser.add_argument("--precision", default="fp32", type=str, help="Comma separated list of precisions to build TensorRT engine Eg: fp32,fp16")
+    arg_parser.add_argument(
+        "--precision",
+        default="fp32",
+        type=str,
+        help="Comma separated list of precisions to build TensorRT engine Eg: fp32,fp16",
+    )
    arg_parser.add_argument("--calibration_cache", type=str, help="Name of the calibration cache file")
    arg_parser.add_argument("--device", type=int, help="device id")
-    arg_parser.add_argument("--truncate", action='store_true', help="Truncate long and double weights in the network  in Torch-TensorRT")
-    arg_parser.add_argument("--is_trt_engine", action='store_true', help="Boolean flag to determine if the user provided model is a TRT engine or not")
+    arg_parser.add_argument(
+        "--truncate", action="store_true", help="Truncate long and double weights in the network  in Torch-TensorRT"
+    )
+    arg_parser.add_argument(
+        "--is_trt_engine",
+        action="store_true",
+        help="Boolean flag to determine if the user provided model is a TRT engine or not",
+    )
    arg_parser.add_argument("--report", type=str, help="Path of the output file where performance summary is written.")
    args = arg_parser.parse_args()

    cudnn.benchmark = True
    # Create random input tensor of certain size
@@ -324,59 +359,69 @@
        # Load YAML params
        params = parser.read_config()
        model, model_name, is_trt_engine = load_model(params)

        # Default device is set to 0. Configurable using yaml config file.
-        torch.cuda.set_device(params.get('runtime').get('device', 0))
-
-        num_input = params.get('input').get('num_inputs')
-        truncate_long_and_double = params.get('runtime').get('truncate_long_and_double', False)
-        batch_size = params.get('input').get('batch_size', 1)
-        for precision in params.get('runtime').get('precision', 'fp32'):
+        torch.cuda.set_device(params.get("runtime").get("device", 0))
+
+        num_input = params.get("input").get("num_inputs")
+        truncate_long_and_double = params.get("runtime").get("truncate_long_and_double", False)
+        batch_size = params.get("input").get("batch_size", 1)
+        for precision in params.get("runtime").get("precision", "fp32"):
            input_tensors = []
-            num_input = params.get('input').get('num_inputs', 1)
+            num_input = params.get("input").get("num_inputs", 1)
            for i in range(num_input):
-                inp_tensor = params.get('input').get('input' + str(i))
-                input_tensors.append(torch.randint(0, 2, tuple(d for d in inp_tensor), dtype=precision_to_dtype(precision)).cuda())
+                inp_tensor = params.get("input").get("input" + str(i))
+                input_tensors.append(
+                    torch.randint(0, 2, tuple(d for d in inp_tensor), dtype=precision_to_dtype(precision)).cuda()
+                )

            if is_trt_engine:
-                print("Warning, TensorRT engine file is configured. Please make sure the precision matches with the TRT engine for reliable results")
+                print(
+                    "Warning, TensorRT engine file is configured. Please make sure the precision matches with the TRT engine for reliable results"
+                )

            if not is_trt_engine and (precision == "fp16" or precision == "half"):
                # If model is TensorRT serialized engine then model.half will report failure
                model = model.half()

-            backends = params.get('backend')
+            backends = params.get("backend")
            # Run inference
-            status = run(model, backends, input_tensors, params, precision, truncate_long_and_double, batch_size, is_trt_engine)
+            status = run(
+                model, backends, input_tensors, params, precision, truncate_long_and_double, batch_size, is_trt_engine
+            )
    else:
        params = vars(args)
-        model_name = params['model']
+        model_name = params["model"]
        if os.path.exists(model_name):
            print("Loading user provided model: ", model_name)
            model = torch.jit.load(model_name).cuda().eval()
        elif model_name in BENCHMARK_MODELS:
-            model = BENCHMARK_MODELS[model_name]['model'].eval().cuda()
+            model = BENCHMARK_MODELS[model_name]["model"].eval().cuda()
        else:
-            raise ValueError("Invalid model name. Please provide a torchscript model file or model name (among the following options vgg16|resnet50|efficientnet_b0|vit)")
-
-        backends = parse_backends(params['backends'])
-        truncate_long_and_double = params['truncate']
-        batch_size = params['batch_size']
-        is_trt_engine = params['is_trt_engine']
-        precisions = parse_precisions(params['precision'])
+            raise ValueError(
+                "Invalid model name. Please provide a torchscript model file or model name (among the following options vgg16|resnet50|efficientnet_b0|vit)"
+            )
+
+        backends = parse_backends(params["backends"])
+        truncate_long_and_double = params["truncate"]
+        batch_size = params["batch_size"]
+        is_trt_engine = params["is_trt_engine"]
+        precisions = parse_precisions(params["precision"])

        for precision in precisions:
-            input_tensors = parse_inputs(params['inputs'], precision_to_dtype(precision))
+            input_tensors = parse_inputs(params["inputs"], precision_to_dtype(precision))
            if not is_trt_engine and (precision == "fp16" or precision == "half"):
                # If model is TensorRT serialized engine then model.half will report failure
                model = model.half()
-            status = run(model, backends, input_tensors, params, precision, truncate_long_and_double, batch_size, is_trt_engine)
+            status = run(
+                model, backends, input_tensors, params, precision, truncate_long_and_double, batch_size, is_trt_engine
+            )

    # Generate report
-    print('Model Summary: ', model_name)
+    print("Model Summary: ", model_name)
    summary = pd.DataFrame(results)
    print(summary)
-    with open(args.report, 'w') as file:
-        file.write('Model Summary: ' + model_name + '\n')
+    with open(args.report, "w") as file:
+        file.write("Model Summary: " + model_name + "\n")
        file.write(summary.to_string())
    file.close()

github-actions

There are some changes that do not conform to Python style guidelines:

--- tools/perf/custom_models.py	2022-08-11 18:16:56.670388 +0000
+++ tools/perf/custom_models.py	2022-08-11 18:20:40.224783 +0000
@@ -1,9 +1,10 @@
import torch
import torch.nn as nn
from transformers import BertModel, BertTokenizer, BertConfig
import torch.nn.functional as F
+

def BertModule():
    model_name = "bert-base-uncased"
    enc = BertTokenizer.from_pretrained(model_name)
    text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
--- tools/perf/hub.py	2022-08-11 18:16:56.670388 +0000
+++ tools/perf/hub.py	2022-08-11 18:20:40.281854 +0000
@@ -15,40 +15,25 @@
# Detect case of no GPU before deserialization of models on GPU
if not torch.cuda.is_available():
    raise Exception("No GPU found. Please check if installed torch version is compatible with CUDA version")

# Downloads all model files again if manifest file is not present
-MANIFEST_FILE = 'model_manifest.json'
+MANIFEST_FILE = "model_manifest.json"

BENCHMARK_MODELS = {
-    "vgg16": {
-        "model": models.vgg16(weights=None),
-        "path": "script"
-    },
-    "resnet50": {
-        "model": models.resnet50(weights=None),
-        "path": "script"
-    },
-    "efficientnet_b0": {
-        "model": timm.create_model('efficientnet_b0', pretrained=True),
-        "path": "script"
-    },
-    "vit": {
-        "model": timm.create_model('vit_base_patch16_224', pretrained=True),
-        "path": "script"
-    },
-    "bert_base_uncased": {
-        "model": cm.BertModule(),
-        "path": "trace"
-    },
+    "vgg16": {"model": models.vgg16(weights=None), "path": "script"},
+    "resnet50": {"model": models.resnet50(weights=None), "path": "script"},
+    "efficientnet_b0": {"model": timm.create_model("efficientnet_b0", pretrained=True), "path": "script"},
+    "vit": {"model": timm.create_model("vit_base_patch16_224", pretrained=True), "path": "script"},
+    "bert_base_uncased": {"model": cm.BertModule(), "path": "trace"},
}


def get(n, m, manifest):
    print("Downloading {}".format(n))
-    traced_filename = "models/" + n + '_traced.jit.pt'
-    script_filename = "models/" + n + '_scripted.jit.pt'
+    traced_filename = "models/" + n + "_traced.jit.pt"
+    script_filename = "models/" + n + "_scripted.jit.pt"
    x = torch.ones((1, 3, 300, 300)).cuda()
    if n == "bert-base-uncased":
        traced_model = m["model"]
        torch.jit.save(traced_model, traced_filename)
        manifest.update({n: [traced_filename]})
@@ -78,13 +63,15 @@
    else:
        for n, m in BENCHMARK_MODELS.items():
            scripted_filename = "models/" + n + "_scripted.jit.pt"
            traced_filename = "models/" + n + "_traced.jit.pt"
            # Check if model file exists on disk
-            if (m["path"] == "both" and os.path.exists(scripted_filename) and os.path.exists(traced_filename)) or \
-               (m["path"] == "script" and os.path.exists(scripted_filename)) or \
-               (m["path"] == "trace" and os.path.exists(traced_filename)):
+            if (
+                (m["path"] == "both" and os.path.exists(scripted_filename) and os.path.exists(traced_filename))
+                or (m["path"] == "script" and os.path.exists(scripted_filename))
+                or (m["path"] == "trace" and os.path.exists(traced_filename))
+            ):
                print("Skipping {} ".format(n))
                continue
            manifest = get(n, m, manifest)


@@ -96,31 +83,35 @@
    # Check if Manifest file exists or is empty
    if not os.path.exists(MANIFEST_FILE) or os.stat(MANIFEST_FILE).st_size == 0:
        manifest = {"version": torch_version}

        # Creating an empty manifest file for overwriting post setup
-        os.system('touch {}'.format(MANIFEST_FILE))
+        os.system("touch {}".format(MANIFEST_FILE))
    else:
        manifest_exists = True

        # Load manifest if already exists
-        with open(MANIFEST_FILE, 'r') as f:
+        with open(MANIFEST_FILE, "r") as f:
            manifest = json.load(f)
-            if manifest['version'] == torch_version:
+            if manifest["version"] == torch_version:
                version_matches = True
            else:
-                print("Torch version: {} mismatches \
+                print(
+                    "Torch version: {} mismatches \
                with manifest's version: {}. Re-downloading \
-                all models".format(torch_version, manifest['version']))
+                all models".format(
+                        torch_version, manifest["version"]
+                    )
+                )

                # Overwrite the manifest version as current torch version
-                manifest['version'] = torch_version
+                manifest["version"] = torch_version

    download_models(version_matches, manifest)

    # Write updated manifest file to disk
-    with open(MANIFEST_FILE, 'r+') as f:
+    with open(MANIFEST_FILE, "r+") as f:
        data = f.read()
        f.seek(0)
        record = json.dumps(manifest)
        f.write(record)
        f.truncate()
--- tools/perf/utils.py	2022-08-11 18:16:56.670388 +0000
+++ tools/perf/utils.py	2022-08-11 18:20:40.318912 +0000
@@ -3,57 +3,46 @@
import custom_models as cm
import torchvision.models as models
import timm

BENCHMARK_MODELS = {
-    "vgg16": {
-        "model": models.vgg16(pretrained=True),
-        "path": "script"
-    },
-    "resnet50": {
-        "model": torch.hub.load('pytorch/vision:v0.9.0', 'resnet50', pretrained=True),
-        "path": "script"
-    },
-    "efficientnet_b0": {
-        "model": timm.create_model('efficientnet_b0', pretrained=True),
-        "path": "script"
-    },
-    "vit": {
-        "model": timm.create_model('vit_base_patch16_224', pretrained=True),
-        "path": "script"
-    },
-    "bert_base_uncased": {
-        "model": cm.BertModule(),
-        "path": "trace"
-    },
+    "vgg16": {"model": models.vgg16(pretrained=True), "path": "script"},
+    "resnet50": {"model": torch.hub.load("pytorch/vision:v0.9.0", "resnet50", pretrained=True), "path": "script"},
+    "efficientnet_b0": {"model": timm.create_model("efficientnet_b0", pretrained=True), "path": "script"},
+    "vit": {"model": timm.create_model("vit_base_patch16_224", pretrained=True), "path": "script"},
+    "bert_base_uncased": {"model": cm.BertModule(), "path": "trace"},
}

+
def precision_to_dtype(pr):
-    if pr == 'fp32':
+    if pr == "fp32":
        return torch.float
-    elif pr == 'fp16' or pr == 'half':
+    elif pr == "fp16" or pr == "half":
        return torch.half
-    elif pr == 'int32':
+    elif pr == "int32":
        return torch.int32
-    elif pr == 'bool':
+    elif pr == "bool":
        return torch.bool
    else:
        return torch.float32

+
def parse_inputs(user_inputs, dtype):
-    parsed_inputs = user_inputs.split(';')
+    parsed_inputs = user_inputs.split(";")
    torchtrt_inputs = []
    for input in parsed_inputs:
        input_shape = []
-        input_shape_and_dtype = input.split('@')
+        input_shape_and_dtype = input.split("@")
        dtype = precision_to_dtype(input_shape_and_dtype[1]) if len(input_shape_and_dtype) == 2 else dtype
-        for input_dim in input_shape_and_dtype[0][1:-1].split(','):
+        for input_dim in input_shape_and_dtype[0][1:-1].split(","):
            input_shape.append(int(input_dim))
        torchtrt_inputs.append(torch.randint(0, 5, input_shape, dtype=dtype).cuda())

    return torchtrt_inputs

+
def parse_backends(backends):
-    return backends.split(',')
+    return backends.split(",")
+

def parse_precisions(precisions):
-    return precisions.split(',')
+    return precisions.split(",")
--- tools/perf/perf_run.py	2022-08-11 18:16:56.670388 +0000
+++ tools/perf/perf_run.py	2022-08-11 18:20:40.499282 +0000
@@ -42,18 +42,21 @@

    # Retrieves the value from the configuration else uses default values
    def get(self, key, default_value=None):
        if not key in self.params:
            if not default_value:
-                raise ValueError('Key {} is not present and default_value is not configured. Please run it with default value', key)
+                raise ValueError(
+                    "Key {} is not present and default_value is not configured. Please run it with default value", key
+                )
            self.params[key] = default_value
        return self.params[key]
+

# Runs inference using Torch backend
def run_torch(model, input_tensors, params, precision, batch_size):
    print("Running Torch for precision: ", precision, " batch_size : ", batch_size)
-    iters = params.get('iterations', 20)
+    iters = params.get("iterations", 20)

    # Warm up
    with torch.no_grad():
        for _ in range(WARMUP_ITER):
            features = model(*input_tensors)
@@ -69,29 +72,30 @@
            end_time = timeit.default_timer()
            meas_time = end_time - start_time
            timings.append(meas_time)

    recordStats("Torch", timings, precision, batch_size)
+

# Runs inference using Torch-TensorRT backend
def run_torch_tensorrt(model, input_tensors, params, precision, truncate_long_and_double, batch_size):
    print("Running Torch-TensorRT for precision: ", precision, " batch_size : ", batch_size)
    # Compiling Torch-TensorRT model
    compile_settings = {
-       "inputs": input_tensors,
-       "enabled_precisions": {precision_to_dtype(precision)} ,
-       "truncate_long_and_double": truncate_long_and_double,
-       "min_block_size" : 1,
+        "inputs": input_tensors,
+        "enabled_precisions": {precision_to_dtype(precision)},
+        "truncate_long_and_double": truncate_long_and_double,
+        "min_block_size": 1,
    }

-    if precision == 'int8':
-        compile_settings.update({"calib": params.get('calibration_cache')})
+    if precision == "int8":
+        compile_settings.update({"calib": params.get("calibration_cache")})

    with torchtrt.logging.errors():
        model = torchtrt.compile(model, **compile_settings)

-    iters = params.get('iterations', 20)
+    iters = params.get("iterations", 20)
    # Warm up
    with torch.no_grad():
        for _ in range(WARMUP_ITER):
            features = model(*input_tensors)

@@ -106,10 +110,11 @@
            end_time = timeit.default_timer()
            meas_time = end_time - start_time
            timings.append(meas_time)

    recordStats("Torch-TensorRT", timings, precision, batch_size)
+

# Runs inference using FX2TRT backend
def run_fx2trt(model, input_tensors, params, precision, batch_size):
    print("Running FX2TRT for precision: ", precision, " batch_size : ", batch_size)
    if precision == "fp32":
@@ -125,11 +130,11 @@
        max_batch_size=batch_size,
        lower_precision=precision,
        verbose_log=False,
    )

-    iters = params.get('iterations', 20)
+    iters = params.get("iterations", 20)
    # Warm up
    with torch.no_grad():
        for _ in range(WARMUP_ITER):
            features = model(*input_tensors)

@@ -144,10 +149,11 @@
            end_time = timeit.default_timer()
            meas_time = end_time - start_time
            timings.append(meas_time)

    recordStats("FX-TensorRT", timings, precision, batch_size)
+

def torch_dtype_from_trt(dtype):
    if dtype == trt.int8:
        return torch.int8
    elif dtype == trt.bool:
@@ -159,20 +165,23 @@
    elif dtype == trt.float32:
        return torch.float32
    else:
        raise TypeError("%s is not supported by torch" % dtype)

+
def torch_device_from_trt(device):
    if device == trt.TensorLocation.DEVICE:
        return torch.device("cuda")
    elif device == trt.TensorLocation.HOST:
        return torch.device("cpu")
    else:
        return TypeError("%s is not supported by torch" % device)


-def run_tensorrt(model, input_tensors, params, precision, truncate_long_and_double=False, is_trt_engine=False, batch_size=1):
+def run_tensorrt(
+    model, input_tensors, params, precision, truncate_long_and_double=False, is_trt_engine=False, batch_size=1
+):
    engine = None

    # If the model file is a TensorRT engine then directly deserialize and run inference
    # else convert the torch module to a TensorRT engine first and then run inference
    if not is_trt_engine:
@@ -189,16 +198,16 @@
    # Deserialize the TensorRT engine
    with trt.Logger() as logger, trt.Runtime(logger) as runtime:
        engine = runtime.deserialize_cuda_engine(model)

    print("Running TensorRT for precision: ", precision, " batch_size : ", batch_size)
-    iters = params.get('iterations', 20)
+    iters = params.get("iterations", 20)

    # Compiling the bindings
    bindings = engine.num_bindings * [None]
    k = 0
-    for idx,_ in enumerate(bindings):
+    for idx, _ in enumerate(bindings):
        dtype = torch_dtype_from_trt(engine.get_binding_dtype(idx))
        shape = tuple(engine.get_binding_shape(idx))
        device = torch_device_from_trt(engine.get_location(idx))
        if not engine.binding_is_input(idx):
            # Output bindings
@@ -223,23 +232,26 @@
            meas_time = end_time - start_time
            timings.append(meas_time)

    recordStats("TensorRT", timings, precision, batch_size)

+
# Deploys inference run for different backend configurations
-def run(model, backends, input_tensors, params, precision, truncate_long_and_double=False, batch_size=1, is_trt_engine=False):
+def run(
+    model, backends, input_tensors, params, precision, truncate_long_and_double=False, batch_size=1, is_trt_engine=False
+):
    for backend in backends:
-        if precision == 'int8':
-            if backend == 'all' or backend == 'torch':
+        if precision == "int8":
+            if backend == "all" or backend == "torch":
                print("int8 precision is not supported for torch runtime in this script yet")
                return False

-            if backend == 'all' or backend == 'torch_tensorrt' or params.get('calibration_cache', None) == None:
+            if backend == "all" or backend == "torch_tensorrt" or params.get("calibration_cache", None) == None:
                print("int8 precision expects calibration cache file for inference")
                return False

-        if backend == 'all':
+        if backend == "all":
            run_torch(model, input_tensors, params, precision, batch_size)
            run_torch_tensorrt(model, input_tensors, params, precision, truncate_long_and_double, batch_size)
            run_tensorrt(model, input_tensors, params, precision, truncate_long_and_double, is_trt_engine, batch_size)

        elif backend == "torch":
@@ -252,12 +264,13 @@
            run_fx2trt(model, input_tensors, params, precision, batch_size)

        elif backend == "tensorrt":
            run_tensorrt(model, input_tensors, params, precision, truncate_long_and_double, is_trt_engine, batch_size)

+
# Generate report
-def recordStats(backend, timings, precision, batch_size = 1):
+def recordStats(backend, timings, precision, batch_size=1):
    times = np.array(timings)
    steps = len(times)
    speeds = batch_size / times
    time_mean = np.mean(times)
    time_med = np.median(times)
@@ -265,55 +278,77 @@
    time_std = np.std(times, ddof=0)
    speed_mean = np.mean(speeds)
    speed_med = np.median(speeds)

    stats = {
-        'Backend' : backend,
-        'Precision' : precision,
-        'Batch size' : batch_size,
-        'Median(FPS)' : speed_med,
-        'Mean(FPS)' : speed_mean,
-        'Median-Latency(ms)' : time_med,
-        'Mean-Latency(ms)' : time_mean,
+        "Backend": backend,
+        "Precision": precision,
+        "Batch size": batch_size,
+        "Median(FPS)": speed_med,
+        "Mean(FPS)": speed_mean,
+        "Median-Latency(ms)": time_med,
+        "Mean-Latency(ms)": time_mean,
    }
    results.append(stats)
+

def load_model(params):
    model = None
    is_trt_engine = False
    # Load torch model traced/scripted
-    model_file = params.get('model').get('filename')
-    try :
-        model_name = params.get('model').get('name')
+    model_file = params.get("model").get("filename")
+    try:
+        model_name = params.get("model").get("name")
    except:
        model_name = model_file

    print("Loading model: ", model_file)
-    if model_file.endswith('.plan'):
+    if model_file.endswith(".plan"):
        is_trt_engine = True
        # Read the TensorRT engine file
-        with open(model_file, 'rb') as fin:
+        with open(model_file, "rb") as fin:
            model = fin.read()
    else:
        model = torch.jit.load(model_file).cuda()

    return model, model_name, is_trt_engine


-if __name__ == '__main__':
+if __name__ == "__main__":
    arg_parser = argparse.ArgumentParser(description="Run inference on a model with random input values")
-    arg_parser.add_argument("--config", type=str, help="Load YAML based configuration file to run the inference. If this is used other params will be ignored")
+    arg_parser.add_argument(
+        "--config",
+        type=str,
+        help="Load YAML based configuration file to run the inference. If this is used other params will be ignored",
+    )
    # The following options are manual user provided settings
-    arg_parser.add_argument("--backends", type=str, help="Comma separated string of backends. Eg: torch,torch_tensorrt,tensorrt")
+    arg_parser.add_argument(
+        "--backends", type=str, help="Comma separated string of backends. Eg: torch,torch_tensorrt,tensorrt"
+    )
    arg_parser.add_argument("--model", type=str, help="Name of the model file")
-    arg_parser.add_argument("--inputs", type=str, help="List of input shapes. Eg: (1, 3, 224, 224)@fp32 for Resnet or (1, 128)@int32;(1, 128)@int32 for BERT")
+    arg_parser.add_argument(
+        "--inputs",
+        type=str,
+        help="List of input shapes. Eg: (1, 3, 224, 224)@fp32 for Resnet or (1, 128)@int32;(1, 128)@int32 for BERT",
+    )
    arg_parser.add_argument("--batch_size", type=int, default=1, help="Batch size to build and run")
-    arg_parser.add_argument("--precision", default="fp32", type=str, help="Comma separated list of precisions to build TensorRT engine Eg: fp32,fp16")
+    arg_parser.add_argument(
+        "--precision",
+        default="fp32",
+        type=str,
+        help="Comma separated list of precisions to build TensorRT engine Eg: fp32,fp16",
+    )
    arg_parser.add_argument("--calibration_cache", type=str, help="Name of the calibration cache file")
    arg_parser.add_argument("--device", type=int, help="device id")
-    arg_parser.add_argument("--truncate", action='store_true', help="Truncate long and double weights in the network  in Torch-TensorRT")
-    arg_parser.add_argument("--is_trt_engine", action='store_true', help="Boolean flag to determine if the user provided model is a TRT engine or not")
+    arg_parser.add_argument(
+        "--truncate", action="store_true", help="Truncate long and double weights in the network  in Torch-TensorRT"
+    )
+    arg_parser.add_argument(
+        "--is_trt_engine",
+        action="store_true",
+        help="Boolean flag to determine if the user provided model is a TRT engine or not",
+    )
    arg_parser.add_argument("--report", type=str, help="Path of the output file where performance summary is written.")
    args = arg_parser.parse_args()

    cudnn.benchmark = True
    # Create random input tensor of certain size
@@ -324,59 +359,69 @@
        # Load YAML params
        params = parser.read_config()
        model, model_name, is_trt_engine = load_model(params)

        # Default device is set to 0. Configurable using yaml config file.
-        torch.cuda.set_device(params.get('runtime').get('device', 0))
-
-        num_input = params.get('input').get('num_inputs')
-        truncate_long_and_double = params.get('runtime').get('truncate_long_and_double', False)
-        batch_size = params.get('input').get('batch_size', 1)
-        for precision in params.get('runtime').get('precision', 'fp32'):
+        torch.cuda.set_device(params.get("runtime").get("device", 0))
+
+        num_input = params.get("input").get("num_inputs")
+        truncate_long_and_double = params.get("runtime").get("truncate_long_and_double", False)
+        batch_size = params.get("input").get("batch_size", 1)
+        for precision in params.get("runtime").get("precision", "fp32"):
            input_tensors = []
-            num_input = params.get('input').get('num_inputs', 1)
+            num_input = params.get("input").get("num_inputs", 1)
            for i in range(num_input):
-                inp_tensor = params.get('input').get('input' + str(i))
-                input_tensors.append(torch.randint(0, 2, tuple(d for d in inp_tensor), dtype=precision_to_dtype(precision)).cuda())
+                inp_tensor = params.get("input").get("input" + str(i))
+                input_tensors.append(
+                    torch.randint(0, 2, tuple(d for d in inp_tensor), dtype=precision_to_dtype(precision)).cuda()
+                )

            if is_trt_engine:
-                print("Warning, TensorRT engine file is configured. Please make sure the precision matches with the TRT engine for reliable results")
+                print(
+                    "Warning, TensorRT engine file is configured. Please make sure the precision matches with the TRT engine for reliable results"
+                )

            if not is_trt_engine and (precision == "fp16" or precision == "half"):
                # If model is TensorRT serialized engine then model.half will report failure
                model = model.half()

-            backends = params.get('backend')
+            backends = params.get("backend")
            # Run inference
-            status = run(model, backends, input_tensors, params, precision, truncate_long_and_double, batch_size, is_trt_engine)
+            status = run(
+                model, backends, input_tensors, params, precision, truncate_long_and_double, batch_size, is_trt_engine
+            )
    else:
        params = vars(args)
-        model_name = params['model']
+        model_name = params["model"]
        if os.path.exists(model_name):
            print("Loading user provided model: ", model_name)
            model = torch.jit.load(model_name).cuda().eval()
        elif model_name in BENCHMARK_MODELS:
-            model = BENCHMARK_MODELS[model_name]['model'].eval().cuda()
+            model = BENCHMARK_MODELS[model_name]["model"].eval().cuda()
        else:
-            raise ValueError("Invalid model name. Please provide a torchscript model file or model name (among the following options vgg16|resnet50|efficientnet_b0|vit)")
-
-        backends = parse_backends(params['backends'])
-        truncate_long_and_double = params['truncate']
-        batch_size = params['batch_size']
-        is_trt_engine = params['is_trt_engine']
-        precisions = parse_precisions(params['precision'])
+            raise ValueError(
+                "Invalid model name. Please provide a torchscript model file or model name (among the following options vgg16|resnet50|efficientnet_b0|vit)"
+            )
+
+        backends = parse_backends(params["backends"])
+        truncate_long_and_double = params["truncate"]
+        batch_size = params["batch_size"]
+        is_trt_engine = params["is_trt_engine"]
+        precisions = parse_precisions(params["precision"])

        for precision in precisions:
-            input_tensors = parse_inputs(params['inputs'], precision_to_dtype(precision))
+            input_tensors = parse_inputs(params["inputs"], precision_to_dtype(precision))
            if not is_trt_engine and (precision == "fp16" or precision == "half"):
                # If model is TensorRT serialized engine then model.half will report failure
                model = model.half()
-            status = run(model, backends, input_tensors, params, precision, truncate_long_and_double, batch_size, is_trt_engine)
+            status = run(
+                model, backends, input_tensors, params, precision, truncate_long_and_double, batch_size, is_trt_engine
+            )

    # Generate report
-    print('Model Summary: ', model_name)
+    print("Model Summary: ", model_name)
    summary = pd.DataFrame(results)
    print(summary)
-    with open(args.report, 'w') as file:
-        file.write('Model Summary: ' + model_name + '\n')
+    with open(args.report, "w") as file:
+        file.write("Model Summary: " + model_name + "\n")
        file.write(summary.to_string())
    file.close()

Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>

github-actions

There are some changes that do not conform to Python style guidelines:

--- tools/perf/custom_models.py	2022-08-11 18:29:37.537228 +0000
+++ tools/perf/custom_models.py	2022-08-11 18:32:55.129614 +0000
@@ -1,9 +1,10 @@
import torch
import torch.nn as nn
from transformers import BertModel, BertTokenizer, BertConfig
import torch.nn.functional as F
+

def BertModule():
    model_name = "bert-base-uncased"
    enc = BertTokenizer.from_pretrained(model_name)
    text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
--- tools/perf/hub.py	2022-08-11 18:29:37.537228 +0000
+++ tools/perf/hub.py	2022-08-11 18:32:55.208488 +0000
@@ -15,40 +15,25 @@
# Detect case of no GPU before deserialization of models on GPU
if not torch.cuda.is_available():
    raise Exception("No GPU found. Please check if installed torch version is compatible with CUDA version")

# Downloads all model files again if manifest file is not present
-MANIFEST_FILE = 'model_manifest.json'
+MANIFEST_FILE = "model_manifest.json"

BENCHMARK_MODELS = {
-    "vgg16": {
-        "model": models.vgg16(weights=None),
-        "path": "script"
-    },
-    "resnet50": {
-        "model": models.resnet50(weights=None),
-        "path": "script"
-    },
-    "efficientnet_b0": {
-        "model": timm.create_model('efficientnet_b0', pretrained=True),
-        "path": "script"
-    },
-    "vit": {
-        "model": timm.create_model('vit_base_patch16_224', pretrained=True),
-        "path": "script"
-    },
-    "bert_base_uncased": {
-        "model": cm.BertModule(),
-        "path": "trace"
-    },
+    "vgg16": {"model": models.vgg16(weights=None), "path": "script"},
+    "resnet50": {"model": models.resnet50(weights=None), "path": "script"},
+    "efficientnet_b0": {"model": timm.create_model("efficientnet_b0", pretrained=True), "path": "script"},
+    "vit": {"model": timm.create_model("vit_base_patch16_224", pretrained=True), "path": "script"},
+    "bert_base_uncased": {"model": cm.BertModule(), "path": "trace"},
}


def get(n, m, manifest):
    print("Downloading {}".format(n))
-    traced_filename = "models/" + n + '_traced.jit.pt'
-    script_filename = "models/" + n + '_scripted.jit.pt'
+    traced_filename = "models/" + n + "_traced.jit.pt"
+    script_filename = "models/" + n + "_scripted.jit.pt"
    x = torch.ones((1, 3, 300, 300)).cuda()
    if n == "bert-base-uncased":
        traced_model = m["model"]
        torch.jit.save(traced_model, traced_filename)
        manifest.update({n: [traced_filename]})
@@ -78,13 +63,15 @@
    else:
        for n, m in BENCHMARK_MODELS.items():
            scripted_filename = "models/" + n + "_scripted.jit.pt"
            traced_filename = "models/" + n + "_traced.jit.pt"
            # Check if model file exists on disk
-            if (m["path"] == "both" and os.path.exists(scripted_filename) and os.path.exists(traced_filename)) or \
-               (m["path"] == "script" and os.path.exists(scripted_filename)) or \
-               (m["path"] == "trace" and os.path.exists(traced_filename)):
+            if (
+                (m["path"] == "both" and os.path.exists(scripted_filename) and os.path.exists(traced_filename))
+                or (m["path"] == "script" and os.path.exists(scripted_filename))
+                or (m["path"] == "trace" and os.path.exists(traced_filename))
+            ):
                print("Skipping {} ".format(n))
                continue
            manifest = get(n, m, manifest)


@@ -96,31 +83,35 @@
    # Check if Manifest file exists or is empty
    if not os.path.exists(MANIFEST_FILE) or os.stat(MANIFEST_FILE).st_size == 0:
        manifest = {"version": torch_version}

        # Creating an empty manifest file for overwriting post setup
-        os.system('touch {}'.format(MANIFEST_FILE))
+        os.system("touch {}".format(MANIFEST_FILE))
    else:
        manifest_exists = True

        # Load manifest if already exists
-        with open(MANIFEST_FILE, 'r') as f:
+        with open(MANIFEST_FILE, "r") as f:
            manifest = json.load(f)
-            if manifest['version'] == torch_version:
+            if manifest["version"] == torch_version:
                version_matches = True
            else:
-                print("Torch version: {} mismatches \
+                print(
+                    "Torch version: {} mismatches \
                with manifest's version: {}. Re-downloading \
-                all models".format(torch_version, manifest['version']))
+                all models".format(
+                        torch_version, manifest["version"]
+                    )
+                )

                # Overwrite the manifest version as current torch version
-                manifest['version'] = torch_version
+                manifest["version"] = torch_version

    download_models(version_matches, manifest)

    # Write updated manifest file to disk
-    with open(MANIFEST_FILE, 'r+') as f:
+    with open(MANIFEST_FILE, "r+") as f:
        data = f.read()
        f.seek(0)
        record = json.dumps(manifest)
        f.write(record)
        f.truncate()
--- tools/perf/utils.py	2022-08-11 18:29:37.537228 +0000
+++ tools/perf/utils.py	2022-08-11 18:32:55.239338 +0000
@@ -3,57 +3,46 @@
import custom_models as cm
import torchvision.models as models
import timm

BENCHMARK_MODELS = {
-    "vgg16": {
-        "model": models.vgg16(pretrained=True),
-        "path": "script"
-    },
-    "resnet50": {
-        "model": torch.hub.load('pytorch/vision:v0.9.0', 'resnet50', pretrained=True),
-        "path": "script"
-    },
-    "efficientnet_b0": {
-        "model": timm.create_model('efficientnet_b0', pretrained=True),
-        "path": "script"
-    },
-    "vit": {
-        "model": timm.create_model('vit_base_patch16_224', pretrained=True),
-        "path": "script"
-    },
-    "bert_base_uncased": {
-        "model": cm.BertModule(),
-        "path": "trace"
-    },
+    "vgg16": {"model": models.vgg16(pretrained=True), "path": "script"},
+    "resnet50": {"model": torch.hub.load("pytorch/vision:v0.9.0", "resnet50", pretrained=True), "path": "script"},
+    "efficientnet_b0": {"model": timm.create_model("efficientnet_b0", pretrained=True), "path": "script"},
+    "vit": {"model": timm.create_model("vit_base_patch16_224", pretrained=True), "path": "script"},
+    "bert_base_uncased": {"model": cm.BertModule(), "path": "trace"},
}

+
def precision_to_dtype(pr):
-    if pr == 'fp32':
+    if pr == "fp32":
        return torch.float
-    elif pr == 'fp16' or pr == 'half':
+    elif pr == "fp16" or pr == "half":
        return torch.half
-    elif pr == 'int32':
+    elif pr == "int32":
        return torch.int32
-    elif pr == 'bool':
+    elif pr == "bool":
        return torch.bool
    else:
        return torch.float32

+
def parse_inputs(user_inputs, dtype):
-    parsed_inputs = user_inputs.split(';')
+    parsed_inputs = user_inputs.split(";")
    torchtrt_inputs = []
    for input in parsed_inputs:
        input_shape = []
-        input_shape_and_dtype = input.split('@')
+        input_shape_and_dtype = input.split("@")
        dtype = precision_to_dtype(input_shape_and_dtype[1]) if len(input_shape_and_dtype) == 2 else dtype
-        for input_dim in input_shape_and_dtype[0][1:-1].split(','):
+        for input_dim in input_shape_and_dtype[0][1:-1].split(","):
            input_shape.append(int(input_dim))
        torchtrt_inputs.append(torch.randint(0, 5, input_shape, dtype=dtype).cuda())

    return torchtrt_inputs

+
def parse_backends(backends):
-    return backends.split(',')
+    return backends.split(",")
+

def parse_precisions(precisions):
-    return precisions.split(',')
+    return precisions.split(",")
--- tools/perf/perf_run.py	2022-08-11 18:29:37.537228 +0000
+++ tools/perf/perf_run.py	2022-08-11 18:32:55.330922 +0000
@@ -42,18 +42,21 @@

    # Retrieves the value from the configuration else uses default values
    def get(self, key, default_value=None):
        if not key in self.params:
            if not default_value:
-                raise ValueError('Key {} is not present and default_value is not configured. Please run it with default value', key)
+                raise ValueError(
+                    "Key {} is not present and default_value is not configured. Please run it with default value", key
+                )
            self.params[key] = default_value
        return self.params[key]
+

# Runs inference using Torch backend
def run_torch(model, input_tensors, params, precision, batch_size):
    print("Running Torch for precision: ", precision, " batch_size : ", batch_size)
-    iters = params.get('iterations', 20)
+    iters = params.get("iterations", 20)

    # Warm up
    with torch.no_grad():
        for _ in range(WARMUP_ITER):
            features = model(*input_tensors)
@@ -69,29 +72,30 @@
            end_time = timeit.default_timer()
            meas_time = end_time - start_time
            timings.append(meas_time)

    recordStats("Torch", timings, precision, batch_size)
+

# Runs inference using Torch-TensorRT backend
def run_torch_tensorrt(model, input_tensors, params, precision, truncate_long_and_double, batch_size):
    print("Running Torch-TensorRT for precision: ", precision, " batch_size : ", batch_size)
    # Compiling Torch-TensorRT model
    compile_settings = {
-       "inputs": input_tensors,
-       "enabled_precisions": {precision_to_dtype(precision)} ,
-       "truncate_long_and_double": truncate_long_and_double,
-       "min_block_size" : 1,
+        "inputs": input_tensors,
+        "enabled_precisions": {precision_to_dtype(precision)},
+        "truncate_long_and_double": truncate_long_and_double,
+        "min_block_size": 1,
    }

-    if precision == 'int8':
-        compile_settings.update({"calib": params.get('calibration_cache')})
+    if precision == "int8":
+        compile_settings.update({"calib": params.get("calibration_cache")})

    with torchtrt.logging.errors():
        model = torchtrt.compile(model, **compile_settings)

-    iters = params.get('iterations', 20)
+    iters = params.get("iterations", 20)
    # Warm up
    with torch.no_grad():
        for _ in range(WARMUP_ITER):
            features = model(*input_tensors)

@@ -106,10 +110,11 @@
            end_time = timeit.default_timer()
            meas_time = end_time - start_time
            timings.append(meas_time)

    recordStats("Torch-TensorRT", timings, precision, batch_size)
+

# Runs inference using FX2TRT backend
def run_fx2trt(model, input_tensors, params, precision, batch_size):
    print("Running FX2TRT for precision: ", precision, " batch_size : ", batch_size)
    if precision == "fp32":
@@ -125,11 +130,11 @@
        max_batch_size=batch_size,
        lower_precision=precision,
        verbose_log=False,
    )

-    iters = params.get('iterations', 20)
+    iters = params.get("iterations", 20)
    # Warm up
    with torch.no_grad():
        for _ in range(WARMUP_ITER):
            features = model(*input_tensors)

@@ -144,10 +149,11 @@
            end_time = timeit.default_timer()
            meas_time = end_time - start_time
            timings.append(meas_time)

    recordStats("FX-TensorRT", timings, precision, batch_size)
+

def torch_dtype_from_trt(dtype):
    if dtype == trt.int8:
        return torch.int8
    elif dtype == trt.bool:
@@ -159,20 +165,23 @@
    elif dtype == trt.float32:
        return torch.float32
    else:
        raise TypeError("%s is not supported by torch" % dtype)

+
def torch_device_from_trt(device):
    if device == trt.TensorLocation.DEVICE:
        return torch.device("cuda")
    elif device == trt.TensorLocation.HOST:
        return torch.device("cpu")
    else:
        return TypeError("%s is not supported by torch" % device)


-def run_tensorrt(model, input_tensors, params, precision, truncate_long_and_double=False, is_trt_engine=False, batch_size=1):
+def run_tensorrt(
+    model, input_tensors, params, precision, truncate_long_and_double=False, is_trt_engine=False, batch_size=1
+):
    engine = None

    # If the model file is a TensorRT engine then directly deserialize and run inference
    # else convert the torch module to a TensorRT engine first and then run inference
    if not is_trt_engine:
@@ -189,16 +198,16 @@
    # Deserialize the TensorRT engine
    with trt.Logger() as logger, trt.Runtime(logger) as runtime:
        engine = runtime.deserialize_cuda_engine(model)

    print("Running TensorRT for precision: ", precision, " batch_size : ", batch_size)
-    iters = params.get('iterations', 20)
+    iters = params.get("iterations", 20)

    # Compiling the bindings
    bindings = engine.num_bindings * [None]
    k = 0
-    for idx,_ in enumerate(bindings):
+    for idx, _ in enumerate(bindings):
        dtype = torch_dtype_from_trt(engine.get_binding_dtype(idx))
        shape = tuple(engine.get_binding_shape(idx))
        device = torch_device_from_trt(engine.get_location(idx))
        if not engine.binding_is_input(idx):
            # Output bindings
@@ -223,23 +232,26 @@
            meas_time = end_time - start_time
            timings.append(meas_time)

    recordStats("TensorRT", timings, precision, batch_size)

+
# Deploys inference run for different backend configurations
-def run(model, backends, input_tensors, params, precision, truncate_long_and_double=False, batch_size=1, is_trt_engine=False):
+def run(
+    model, backends, input_tensors, params, precision, truncate_long_and_double=False, batch_size=1, is_trt_engine=False
+):
    for backend in backends:
-        if precision == 'int8':
-            if backend == 'all' or backend == 'torch':
+        if precision == "int8":
+            if backend == "all" or backend == "torch":
                print("int8 precision is not supported for torch runtime in this script yet")
                return False

-            if backend == 'all' or backend == 'torch_tensorrt' or params.get('calibration_cache', None) == None:
+            if backend == "all" or backend == "torch_tensorrt" or params.get("calibration_cache", None) == None:
                print("int8 precision expects calibration cache file for inference")
                return False

-        if backend == 'all':
+        if backend == "all":
            run_torch(model, input_tensors, params, precision, batch_size)
            run_torch_tensorrt(model, input_tensors, params, precision, truncate_long_and_double, batch_size)
            run_tensorrt(model, input_tensors, params, precision, truncate_long_and_double, is_trt_engine, batch_size)

        elif backend == "torch":
@@ -252,12 +264,13 @@
            run_fx2trt(model, input_tensors, params, precision, batch_size)

        elif backend == "tensorrt":
            run_tensorrt(model, input_tensors, params, precision, truncate_long_and_double, is_trt_engine, batch_size)

+
# Generate report
-def recordStats(backend, timings, precision, batch_size = 1):
+def recordStats(backend, timings, precision, batch_size=1):
    times = np.array(timings)
    steps = len(times)
    speeds = batch_size / times
    time_mean = np.mean(times)
    time_med = np.median(times)
@@ -265,55 +278,77 @@
    time_std = np.std(times, ddof=0)
    speed_mean = np.mean(speeds)
    speed_med = np.median(speeds)

    stats = {
-        'Backend' : backend,
-        'Precision' : precision,
-        'Batch size' : batch_size,
-        'Median(FPS)' : speed_med,
-        'Mean(FPS)' : speed_mean,
-        'Median-Latency(ms)' : time_med,
-        'Mean-Latency(ms)' : time_mean,
+        "Backend": backend,
+        "Precision": precision,
+        "Batch size": batch_size,
+        "Median(FPS)": speed_med,
+        "Mean(FPS)": speed_mean,
+        "Median-Latency(ms)": time_med,
+        "Mean-Latency(ms)": time_mean,
    }
    results.append(stats)
+

def load_model(params):
    model = None
    is_trt_engine = False
    # Load torch model traced/scripted
-    model_file = params.get('model').get('filename')
-    try :
-        model_name = params.get('model').get('name')
+    model_file = params.get("model").get("filename")
+    try:
+        model_name = params.get("model").get("name")
    except:
        model_name = model_file

    print("Loading model: ", model_file)
-    if model_file.endswith('.plan'):
+    if model_file.endswith(".plan"):
        is_trt_engine = True
        # Read the TensorRT engine file
-        with open(model_file, 'rb') as fin:
+        with open(model_file, "rb") as fin:
            model = fin.read()
    else:
        model = torch.jit.load(model_file).cuda()

    return model, model_name, is_trt_engine


-if __name__ == '__main__':
+if __name__ == "__main__":
    arg_parser = argparse.ArgumentParser(description="Run inference on a model with random input values")
-    arg_parser.add_argument("--config", type=str, help="Load YAML based configuration file to run the inference. If this is used other params will be ignored")
+    arg_parser.add_argument(
+        "--config",
+        type=str,
+        help="Load YAML based configuration file to run the inference. If this is used other params will be ignored",
+    )
    # The following options are manual user provided settings
-    arg_parser.add_argument("--backends", type=str, help="Comma separated string of backends. Eg: torch,torch_tensorrt,fx2trt,tensorrt")
+    arg_parser.add_argument(
+        "--backends", type=str, help="Comma separated string of backends. Eg: torch,torch_tensorrt,fx2trt,tensorrt"
+    )
    arg_parser.add_argument("--model", type=str, help="Name of the model file")
-    arg_parser.add_argument("--inputs", type=str, help="List of input shapes. Eg: (1, 3, 224, 224)@fp32 for Resnet or (1, 128)@int32;(1, 128)@int32 for BERT")
+    arg_parser.add_argument(
+        "--inputs",
+        type=str,
+        help="List of input shapes. Eg: (1, 3, 224, 224)@fp32 for Resnet or (1, 128)@int32;(1, 128)@int32 for BERT",
+    )
    arg_parser.add_argument("--batch_size", type=int, default=1, help="Batch size to build and run")
-    arg_parser.add_argument("--precision", default="fp32", type=str, help="Comma separated list of precisions to build TensorRT engine Eg: fp32,fp16")
+    arg_parser.add_argument(
+        "--precision",
+        default="fp32",
+        type=str,
+        help="Comma separated list of precisions to build TensorRT engine Eg: fp32,fp16",
+    )
    arg_parser.add_argument("--calibration_cache", type=str, help="Name of the calibration cache file")
    arg_parser.add_argument("--device", type=int, help="device id")
-    arg_parser.add_argument("--truncate", action='store_true', help="Truncate long and double weights in the network  in Torch-TensorRT")
-    arg_parser.add_argument("--is_trt_engine", action='store_true', help="Boolean flag to determine if the user provided model is a TRT engine or not")
+    arg_parser.add_argument(
+        "--truncate", action="store_true", help="Truncate long and double weights in the network  in Torch-TensorRT"
+    )
+    arg_parser.add_argument(
+        "--is_trt_engine",
+        action="store_true",
+        help="Boolean flag to determine if the user provided model is a TRT engine or not",
+    )
    arg_parser.add_argument("--report", type=str, help="Path of the output file where performance summary is written.")
    args = arg_parser.parse_args()

    cudnn.benchmark = True
    # Create random input tensor of certain size
@@ -324,59 +359,69 @@
        # Load YAML params
        params = parser.read_config()
        model, model_name, is_trt_engine = load_model(params)

        # Default device is set to 0. Configurable using yaml config file.
-        torch.cuda.set_device(params.get('runtime').get('device', 0))
-
-        num_input = params.get('input').get('num_inputs')
-        truncate_long_and_double = params.get('runtime').get('truncate_long_and_double', False)
-        batch_size = params.get('input').get('batch_size', 1)
-        for precision in params.get('runtime').get('precision', 'fp32'):
+        torch.cuda.set_device(params.get("runtime").get("device", 0))
+
+        num_input = params.get("input").get("num_inputs")
+        truncate_long_and_double = params.get("runtime").get("truncate_long_and_double", False)
+        batch_size = params.get("input").get("batch_size", 1)
+        for precision in params.get("runtime").get("precision", "fp32"):
            input_tensors = []
-            num_input = params.get('input').get('num_inputs', 1)
+            num_input = params.get("input").get("num_inputs", 1)
            for i in range(num_input):
-                inp_tensor = params.get('input').get('input' + str(i))
-                input_tensors.append(torch.randint(0, 2, tuple(d for d in inp_tensor), dtype=precision_to_dtype(precision)).cuda())
+                inp_tensor = params.get("input").get("input" + str(i))
+                input_tensors.append(
+                    torch.randint(0, 2, tuple(d for d in inp_tensor), dtype=precision_to_dtype(precision)).cuda()
+                )

            if is_trt_engine:
-                print("Warning, TensorRT engine file is configured. Please make sure the precision matches with the TRT engine for reliable results")
+                print(
+                    "Warning, TensorRT engine file is configured. Please make sure the precision matches with the TRT engine for reliable results"
+                )

            if not is_trt_engine and (precision == "fp16" or precision == "half"):
                # If model is TensorRT serialized engine then model.half will report failure
                model = model.half()

-            backends = params.get('backend')
+            backends = params.get("backend")
            # Run inference
-            status = run(model, backends, input_tensors, params, precision, truncate_long_and_double, batch_size, is_trt_engine)
+            status = run(
+                model, backends, input_tensors, params, precision, truncate_long_and_double, batch_size, is_trt_engine
+            )
    else:
        params = vars(args)
-        model_name = params['model']
+        model_name = params["model"]
        if os.path.exists(model_name):
            print("Loading user provided model: ", model_name)
            model = torch.jit.load(model_name).cuda().eval()
        elif model_name in BENCHMARK_MODELS:
-            model = BENCHMARK_MODELS[model_name]['model'].eval().cuda()
+            model = BENCHMARK_MODELS[model_name]["model"].eval().cuda()
        else:
-            raise ValueError("Invalid model name. Please provide a torchscript model file or model name (among the following options vgg16|resnet50|efficientnet_b0|vit)")
-
-        backends = parse_backends(params['backends'])
-        truncate_long_and_double = params['truncate']
-        batch_size = params['batch_size']
-        is_trt_engine = params['is_trt_engine']
-        precisions = parse_precisions(params['precision'])
+            raise ValueError(
+                "Invalid model name. Please provide a torchscript model file or model name (among the following options vgg16|resnet50|efficientnet_b0|vit)"
+            )
+
+        backends = parse_backends(params["backends"])
+        truncate_long_and_double = params["truncate"]
+        batch_size = params["batch_size"]
+        is_trt_engine = params["is_trt_engine"]
+        precisions = parse_precisions(params["precision"])

        for precision in precisions:
-            input_tensors = parse_inputs(params['inputs'], precision_to_dtype(precision))
+            input_tensors = parse_inputs(params["inputs"], precision_to_dtype(precision))
            if not is_trt_engine and (precision == "fp16" or precision == "half"):
                # If model is TensorRT serialized engine then model.half will report failure
                model = model.half()
-            status = run(model, backends, input_tensors, params, precision, truncate_long_and_double, batch_size, is_trt_engine)
+            status = run(
+                model, backends, input_tensors, params, precision, truncate_long_and_double, batch_size, is_trt_engine
+            )

    # Generate report
-    print('Model Summary: ', model_name)
+    print("Model Summary: ", model_name)
    summary = pd.DataFrame(results)
    print(summary)
-    with open(args.report, 'w') as file:
-        file.write('Model Summary: ' + model_name + '\n')
+    with open(args.report, "w") as file:
+        file.write("Model Summary: " + model_name + "\n")
        file.write(summary.to_string())
    file.close()

github-actions

Code conforms to C++ style guidelines

narendasan · 2022-08-11T18:34:54Z

I think this branch needs to be rebased seems like the pre-commit was not run

Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>

github-actions

Code conforms to Python style guidelines

github-actions

There are some changes that do not conform to C++ style guidelines:

diff --git a/workspace/core/conversion/converters/impl/element_wise.cpp b/tmp/changes.txt
old mode 100755
new mode 100644
ERROR: Some files do not conform to style guidelines

Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>

github-actions

Code conforms to C++ style guidelines

github-actions

There are some changes that do not conform to Python style guidelines:

--- tools/perf/hub.py	2022-09-08 17:45:26.704885 +0000
+++ tools/perf/hub.py	2022-09-08 17:46:00.411687 +0000
@@ -12,20 +12,28 @@

torch_version = torch.__version__

# Detect case of no GPU before deserialization of models on GPU
if not torch.cuda.is_available():
-    raise Exception("No GPU found. Please check if installed torch version is compatible with CUDA version")
+    raise Exception(
+        "No GPU found. Please check if installed torch version is compatible with CUDA version"
+    )

# Downloads all model files again if manifest file is not present
MANIFEST_FILE = "model_manifest.json"

BENCHMARK_MODELS = {
    "vgg16": {"model": models.vgg16(weights=None), "path": "script"},
    "resnet50": {"model": models.resnet50(weights=None), "path": "script"},
-    "efficientnet_b0": {"model": timm.create_model("efficientnet_b0", pretrained=True), "path": "script"},
-    "vit": {"model": timm.create_model("vit_base_patch16_224", pretrained=True), "path": "script"},
+    "efficientnet_b0": {
+        "model": timm.create_model("efficientnet_b0", pretrained=True),
+        "path": "script",
+    },
+    "vit": {
+        "model": timm.create_model("vit_base_patch16_224", pretrained=True),
+        "path": "script",
+    },
    "bert_base_uncased": {"model": cm.BertModule(), "path": "trace"},
}


def get(n, m, manifest):
@@ -64,11 +72,15 @@
        for n, m in BENCHMARK_MODELS.items():
            scripted_filename = "models/" + n + "_scripted.jit.pt"
            traced_filename = "models/" + n + "_traced.jit.pt"
            # Check if model file exists on disk
            if (
-                (m["path"] == "both" and os.path.exists(scripted_filename) and os.path.exists(traced_filename))
+                (
+                    m["path"] == "both"
+                    and os.path.exists(scripted_filename)
+                    and os.path.exists(traced_filename)
+                )
                or (m["path"] == "script" and os.path.exists(scripted_filename))
                or (m["path"] == "trace" and os.path.exists(traced_filename))
            ):
                print("Skipping {} ".format(n))
                continue
--- tools/perf/utils.py	2022-09-08 17:45:26.704885 +0000
+++ tools/perf/utils.py	2022-09-08 17:46:00.464733 +0000
@@ -4,13 +4,22 @@
import torchvision.models as models
import timm

BENCHMARK_MODELS = {
    "vgg16": {"model": models.vgg16(pretrained=True), "path": "script"},
-    "resnet50": {"model": torch.hub.load("pytorch/vision:v0.9.0", "resnet50", pretrained=True), "path": "script"},
-    "efficientnet_b0": {"model": timm.create_model("efficientnet_b0", pretrained=True), "path": "script"},
-    "vit": {"model": timm.create_model("vit_base_patch16_224", pretrained=True), "path": "script"},
+    "resnet50": {
+        "model": torch.hub.load("pytorch/vision:v0.9.0", "resnet50", pretrained=True),
+        "path": "script",
+    },
+    "efficientnet_b0": {
+        "model": timm.create_model("efficientnet_b0", pretrained=True),
+        "path": "script",
+    },
+    "vit": {
+        "model": timm.create_model("vit_base_patch16_224", pretrained=True),
+        "path": "script",
+    },
    "bert_base_uncased": {"model": cm.BertModule(), "path": "trace"},
}


def precision_to_dtype(pr):
@@ -30,11 +39,15 @@
    parsed_inputs = user_inputs.split(";")
    torchtrt_inputs = []
    for input in parsed_inputs:
        input_shape = []
        input_shape_and_dtype = input.split("@")
-        dtype = precision_to_dtype(input_shape_and_dtype[1]) if len(input_shape_and_dtype) == 2 else dtype
+        dtype = (
+            precision_to_dtype(input_shape_and_dtype[1])
+            if len(input_shape_and_dtype) == 2
+            else dtype
+        )
        for input_dim in input_shape_and_dtype[0][1:-1].split(","):
            input_shape.append(int(input_dim))
        torchtrt_inputs.append(torch.randint(0, 5, input_shape, dtype=dtype).cuda())

    return torchtrt_inputs
--- tools/perf/perf_run.py	2022-09-08 17:45:26.704885 +0000
+++ tools/perf/perf_run.py	2022-09-08 17:46:00.602583 +0000
@@ -17,11 +17,17 @@
import torch_tensorrt as torchtrt
from torch_tensorrt.fx.lower import compile
from torch_tensorrt.fx.utils import LowerPrecision

import tensorrt as trt
-from utils import parse_inputs, parse_backends, precision_to_dtype, parse_precisions, BENCHMARK_MODELS
+from utils import (
+    parse_inputs,
+    parse_backends,
+    precision_to_dtype,
+    parse_precisions,
+    BENCHMARK_MODELS,
+)

WARMUP_ITER = 10
results = []

# YAML Parser class for parsing the run configurations
@@ -43,11 +49,12 @@
    # Retrieves the value from the configuration else uses default values
    def get(self, key, default_value=None):
        if not key in self.params:
            if not default_value:
                raise ValueError(
-                    "Key {} is not present and default_value is not configured. Please run it with default value", key
+                    "Key {} is not present and default_value is not configured. Please run it with default value",
+                    key,
                )
            self.params[key] = default_value
        return self.params[key]


@@ -75,12 +82,19 @@

    recordStats("Torch", timings, precision, batch_size)


# Runs inference using Torch-TensorRT backend
-def run_torch_tensorrt(model, input_tensors, params, precision, truncate_long_and_double, batch_size):
-    print("Running Torch-TensorRT for precision: ", precision, " batch_size : ", batch_size)
+def run_torch_tensorrt(
+    model, input_tensors, params, precision, truncate_long_and_double, batch_size
+):
+    print(
+        "Running Torch-TensorRT for precision: ",
+        precision,
+        " batch_size : ",
+        batch_size,
+    )
    # Compiling Torch-TensorRT model
    compile_settings = {
        "inputs": input_tensors,
        "enabled_precisions": {precision_to_dtype(precision)},
        "truncate_long_and_double": truncate_long_and_double,
@@ -174,11 +188,17 @@
    else:
        return TypeError("%s is not supported by torch" % device)


def run_tensorrt(
-    model, input_tensors, params, precision, truncate_long_and_double=False, is_trt_engine=False, batch_size=1
+    model,
+    input_tensors,
+    params,
+    precision,
+    truncate_long_and_double=False,
+    is_trt_engine=False,
+    batch_size=1,
):
    engine = None

    # If the model file is a TensorRT engine then directly deserialize and run inference
    # else convert the torch module to a TensorRT engine first and then run inference
@@ -235,11 +255,18 @@
    recordStats("TensorRT", timings, precision, batch_size)


# Deploys inference run for different backend configurations
def run(
-    model, backends, input_tensors, params, precision, truncate_long_and_double=False, batch_size=1, is_trt_engine=False
+    model,
+    backends,
+    input_tensors,
+    params,
+    precision,
+    truncate_long_and_double=False,
+    batch_size=1,
+    is_trt_engine=False,
):
    for backend in backends:
        if precision == "int8":
            if backend == "all" or backend == "torch":
                print(
@@ -255,24 +282,54 @@
                print("int8 precision expects calibration cache file for inference")
                return False

        if backend == "all":
            run_torch(model, input_tensors, params, precision, batch_size)
-            run_torch_tensorrt(model, input_tensors, params, precision, truncate_long_and_double, batch_size)
-            run_tensorrt(model, input_tensors, params, precision, truncate_long_and_double, is_trt_engine, batch_size)
+            run_torch_tensorrt(
+                model,
+                input_tensors,
+                params,
+                precision,
+                truncate_long_and_double,
+                batch_size,
+            )
+            run_tensorrt(
+                model,
+                input_tensors,
+                params,
+                precision,
+                truncate_long_and_double,
+                is_trt_engine,
+                batch_size,
+            )

        elif backend == "torch":
            run_torch(model, input_tensors, params, precision, batch_size)

        elif backend == "torch_tensorrt":
-            run_torch_tensorrt(model, input_tensors, params, precision, truncate_long_and_double, batch_size)
+            run_torch_tensorrt(
+                model,
+                input_tensors,
+                params,
+                precision,
+                truncate_long_and_double,
+                batch_size,
+            )

        elif backend == "fx2trt":
            run_fx2trt(model, input_tensors, params, precision, batch_size)

        elif backend == "tensorrt":
-            run_tensorrt(model, input_tensors, params, precision, truncate_long_and_double, is_trt_engine, batch_size)
+            run_tensorrt(
+                model,
+                input_tensors,
+                params,
+                precision,
+                truncate_long_and_double,
+                is_trt_engine,
+                batch_size,
+            )


# Generate report
def recordStats(backend, timings, precision, batch_size=1):
    times = np.array(timings)
@@ -289,12 +346,12 @@
        "Backend": backend,
        "Precision": precision,
        "Batch size": batch_size,
        "Median(FPS)": speed_med,
        "Mean(FPS)": speed_mean,
-        "Median-Latency(ms)": time_med*1000,
-        "Mean-Latency(ms)": time_mean*1000,
+        "Median-Latency(ms)": time_med * 1000,
+        "Mean-Latency(ms)": time_mean * 1000,
    }
    results.append(stats)


def load_model(params):
@@ -328,36 +385,48 @@
        type=str,
        help="Load YAML based configuration file to run the inference. If this is used other params will be ignored",
    )
    # The following options are manual user provided settings
    arg_parser.add_argument(
-        "--backends", type=str, help="Comma separated string of backends. Eg: torch,torch_tensorrt,fx2trt,tensorrt"
+        "--backends",
+        type=str,
+        help="Comma separated string of backends. Eg: torch,torch_tensorrt,fx2trt,tensorrt",
    )
    arg_parser.add_argument("--model", type=str, help="Name of the model file")
    arg_parser.add_argument(
        "--inputs",
        type=str,
        help="List of input shapes. Eg: (1, 3, 224, 224)@fp32 for Resnet or (1, 128)@int32;(1, 128)@int32 for BERT",
    )
-    arg_parser.add_argument("--batch_size", type=int, default=1, help="Batch size to build and run")
+    arg_parser.add_argument(
+        "--batch_size", type=int, default=1, help="Batch size to build and run"
+    )
    arg_parser.add_argument(
        "--precision",
        default="fp32",
        type=str,
        help="Comma separated list of precisions to build TensorRT engine Eg: fp32,fp16",
    )
-    arg_parser.add_argument("--calibration_cache", type=str, help="Name of the calibration cache file")
+    arg_parser.add_argument(
+        "--calibration_cache", type=str, help="Name of the calibration cache file"
+    )
    arg_parser.add_argument("--device", type=int, help="device id")
    arg_parser.add_argument(
-        "--truncate", action="store_true", help="Truncate long and double weights in the network  in Torch-TensorRT"
+        "--truncate",
+        action="store_true",
+        help="Truncate long and double weights in the network  in Torch-TensorRT",
    )
    arg_parser.add_argument(
        "--is_trt_engine",
        action="store_true",
        help="Boolean flag to determine if the user provided model is a TRT engine or not",
    )
-    arg_parser.add_argument("--report", type=str, help="Path of the output file where performance summary is written.")
+    arg_parser.add_argument(
+        "--report",
+        type=str,
+        help="Path of the output file where performance summary is written.",
+    )
    args = arg_parser.parse_args()

    cudnn.benchmark = True
    # Create random input tensor of certain size
    torch.manual_seed(12345)
@@ -370,19 +439,26 @@

        # Default device is set to 0. Configurable using yaml config file.
        torch.cuda.set_device(params.get("runtime").get("device", 0))

        num_input = params.get("input").get("num_inputs")
-        truncate_long_and_double = params.get("runtime").get("truncate_long_and_double", False)
+        truncate_long_and_double = params.get("runtime").get(
+            "truncate_long_and_double", False
+        )
        batch_size = params.get("input").get("batch_size", 1)
        for precision in params.get("runtime").get("precision", "fp32"):
            input_tensors = []
            num_input = params.get("input").get("num_inputs", 1)
            for i in range(num_input):
                inp_tensor = params.get("input").get("input" + str(i))
                input_tensors.append(
-                    torch.randint(0, 2, tuple(d for d in inp_tensor), dtype=precision_to_dtype(precision)).cuda()
+                    torch.randint(
+                        0,
+                        2,
+                        tuple(d for d in inp_tensor),
+                        dtype=precision_to_dtype(precision),
+                    ).cuda()
                )

            if is_trt_engine:
                print(
                    "Warning, TensorRT engine file is configured. Please make sure the precision matches with the TRT engine for reliable results"
@@ -393,11 +469,18 @@
                model = model.half()

            backends = params.get("backend")
            # Run inference
            status = run(
-                model, backends, input_tensors, params, precision, truncate_long_and_double, batch_size, is_trt_engine
+                model,
+                backends,
+                input_tensors,
+                params,
+                precision,
+                truncate_long_and_double,
+                batch_size,
+                is_trt_engine,
            )
    else:
        params = vars(args)
        model_name = params["model"]
        if os.path.exists(model_name):
@@ -415,16 +498,25 @@
        batch_size = params["batch_size"]
        is_trt_engine = params["is_trt_engine"]
        precisions = parse_precisions(params["precision"])

        for precision in precisions:
-            input_tensors = parse_inputs(params["inputs"], precision_to_dtype(precision))
+            input_tensors = parse_inputs(
+                params["inputs"], precision_to_dtype(precision)
+            )
            if not is_trt_engine and (precision == "fp16" or precision == "half"):
                # If model is TensorRT serialized engine then model.half will report failure
                model = model.half()
            status = run(
-                model, backends, input_tensors, params, precision, truncate_long_and_double, batch_size, is_trt_engine
+                model,
+                backends,
+                input_tensors,
+                params,
+                precision,
+                truncate_long_and_double,
+                batch_size,
+                is_trt_engine,
            )

    # Generate report
    print("Model Summary: ", model_name)
    summary = pd.DataFrame(results)

Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>

github-actions

Code conforms to Python style guidelines

github-actions

Code conforms to C++ style guidelines

peri044 · 2022-09-08T17:55:16Z

Merging this PR as this doesn't affect the library. Please re-open incase you see any issues

dheerajperi and others added 11 commits May 31, 2022 12:00

chore: additional options for perf_run tool

46961d9

Signed-off-by: dperi <dperi@nvidia.com>

Merge branch 'master' into perf_changes

a13607a

feat: Add fx2trt backend and revamp current perf utility to accept CL…

7779b50

…I arguments Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>

Merge branch 'master' into perf_changes

6cbf600

chore: Refactor fx2trt functionality

d108f87

Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>

chore: Fix fp16 functionality for fx2trt backend

e92a813

Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>

chore: rebase

6a98dec

Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>

chore: refactor

2925c8a

Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>

chore: minor change

46d0e86

Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>

refactor: Refactor perf_run and add internal benchmark scripts

f8285ba

Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>

Merge branch 'master' into perf_changes

7461a24

peri044 requested a review from andi4191 August 11, 2022 18:16

facebook-github-bot added the cla signed label Aug 11, 2022

peri044 requested a review from narendasan August 11, 2022 18:16

github-actions bot approved these changes Aug 11, 2022

View reviewed changes

github-actions bot requested changes Aug 11, 2022

View reviewed changes

chore : minor refactor

561c339

Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>

github-actions bot requested changes Aug 11, 2022

View reviewed changes

github-actions bot approved these changes Aug 11, 2022

View reviewed changes

peri044 added 2 commits August 11, 2022 12:26

Merge branch 'master' into perf_changes

66573fc

chore: Apply precommit tooling

5bf2f4a

Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>

github-actions bot approved these changes Aug 11, 2022

View reviewed changes

github-actions bot requested changes Aug 11, 2022

View reviewed changes

peri044 added 2 commits August 19, 2022 17:08

chore: Fix merge conflicts

bfae556

Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>

chore: rebase and minor changes

3a14f23

Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>

peri044 added 2 commits August 19, 2022 17:48

chore: Fix reporting to a file setting

d5dbc4d

Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>

chore: minor fixes

2186177

Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>

github-actions bot approved these changes Sep 8, 2022

View reviewed changes

github-actions bot requested changes Sep 8, 2022

View reviewed changes

chore: Linter fixes

77543a0

Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>

github-actions bot approved these changes Sep 8, 2022

View reviewed changes

peri044 merged commit 1efe4b1 into master Sep 8, 2022

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

feat(//tools/perf): Refactor perf_run.py, add fx2trt backend support, usage via CLI arguments #1254

feat(//tools/perf): Refactor perf_run.py, add fx2trt backend support, usage via CLI arguments #1254

peri044 commented Aug 11, 2022

github-actions bot left a comment

github-actions bot left a comment

github-actions bot left a comment

github-actions bot left a comment

github-actions bot left a comment

github-actions bot left a comment

github-actions bot left a comment

github-actions bot left a comment

narendasan commented Aug 11, 2022

github-actions bot left a comment

github-actions bot left a comment

github-actions bot left a comment

github-actions bot left a comment

github-actions bot left a comment

github-actions bot left a comment

peri044 commented Sep 8, 2022

feat(//tools/perf): Refactor perf_run.py, add fx2trt backend support, usage via CLI arguments #1254

feat(//tools/perf): Refactor perf_run.py, add fx2trt backend support, usage via CLI arguments #1254

Conversation

peri044 commented Aug 11, 2022

Description

Type of change

Checklist:

github-actions bot left a comment

Choose a reason for hiding this comment

github-actions bot left a comment

Choose a reason for hiding this comment

github-actions bot left a comment

Choose a reason for hiding this comment

github-actions bot left a comment

Choose a reason for hiding this comment

github-actions bot left a comment

Choose a reason for hiding this comment

github-actions bot left a comment

Choose a reason for hiding this comment

github-actions bot left a comment

Choose a reason for hiding this comment

github-actions bot left a comment

Choose a reason for hiding this comment

narendasan commented Aug 11, 2022

github-actions bot left a comment

Choose a reason for hiding this comment

github-actions bot left a comment

Choose a reason for hiding this comment

github-actions bot left a comment

Choose a reason for hiding this comment

github-actions bot left a comment

Choose a reason for hiding this comment

github-actions bot left a comment

Choose a reason for hiding this comment

github-actions bot left a comment

Choose a reason for hiding this comment

peri044 commented Sep 8, 2022