-
Notifications
You must be signed in to change notification settings - Fork 359
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(//tools/perf): Refactor perf_run.py, add fx2trt backend support, usage via CLI arguments #1254
Conversation
Signed-off-by: dperi <dperi@nvidia.com>
…I arguments Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>
Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>
Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>
Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>
Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>
Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>
Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Code conforms to C++ style guidelines
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Code conforms to C++ style guidelines
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Code conforms to C++ style guidelines
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are some changes that do not conform to Python style guidelines:
--- tools/perf/custom_models.py 2022-08-11 18:17:06.779492 +0000
+++ tools/perf/custom_models.py 2022-08-11 18:20:27.317117 +0000
@@ -1,9 +1,10 @@
import torch
import torch.nn as nn
from transformers import BertModel, BertTokenizer, BertConfig
import torch.nn.functional as F
+
def BertModule():
model_name = "bert-base-uncased"
enc = BertTokenizer.from_pretrained(model_name)
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
--- tools/perf/hub.py 2022-08-11 18:17:06.779492 +0000
+++ tools/perf/hub.py 2022-08-11 18:20:27.448176 +0000
@@ -15,40 +15,25 @@
# Detect case of no GPU before deserialization of models on GPU
if not torch.cuda.is_available():
raise Exception("No GPU found. Please check if installed torch version is compatible with CUDA version")
# Downloads all model files again if manifest file is not present
-MANIFEST_FILE = 'model_manifest.json'
+MANIFEST_FILE = "model_manifest.json"
BENCHMARK_MODELS = {
- "vgg16": {
- "model": models.vgg16(weights=None),
- "path": "script"
- },
- "resnet50": {
- "model": models.resnet50(weights=None),
- "path": "script"
- },
- "efficientnet_b0": {
- "model": timm.create_model('efficientnet_b0', pretrained=True),
- "path": "script"
- },
- "vit": {
- "model": timm.create_model('vit_base_patch16_224', pretrained=True),
- "path": "script"
- },
- "bert_base_uncased": {
- "model": cm.BertModule(),
- "path": "trace"
- },
+ "vgg16": {"model": models.vgg16(weights=None), "path": "script"},
+ "resnet50": {"model": models.resnet50(weights=None), "path": "script"},
+ "efficientnet_b0": {"model": timm.create_model("efficientnet_b0", pretrained=True), "path": "script"},
+ "vit": {"model": timm.create_model("vit_base_patch16_224", pretrained=True), "path": "script"},
+ "bert_base_uncased": {"model": cm.BertModule(), "path": "trace"},
}
def get(n, m, manifest):
print("Downloading {}".format(n))
- traced_filename = "models/" + n + '_traced.jit.pt'
- script_filename = "models/" + n + '_scripted.jit.pt'
+ traced_filename = "models/" + n + "_traced.jit.pt"
+ script_filename = "models/" + n + "_scripted.jit.pt"
x = torch.ones((1, 3, 300, 300)).cuda()
if n == "bert-base-uncased":
traced_model = m["model"]
torch.jit.save(traced_model, traced_filename)
manifest.update({n: [traced_filename]})
@@ -78,13 +63,15 @@
else:
for n, m in BENCHMARK_MODELS.items():
scripted_filename = "models/" + n + "_scripted.jit.pt"
traced_filename = "models/" + n + "_traced.jit.pt"
# Check if model file exists on disk
- if (m["path"] == "both" and os.path.exists(scripted_filename) and os.path.exists(traced_filename)) or \
- (m["path"] == "script" and os.path.exists(scripted_filename)) or \
- (m["path"] == "trace" and os.path.exists(traced_filename)):
+ if (
+ (m["path"] == "both" and os.path.exists(scripted_filename) and os.path.exists(traced_filename))
+ or (m["path"] == "script" and os.path.exists(scripted_filename))
+ or (m["path"] == "trace" and os.path.exists(traced_filename))
+ ):
print("Skipping {} ".format(n))
continue
manifest = get(n, m, manifest)
@@ -96,31 +83,35 @@
# Check if Manifest file exists or is empty
if not os.path.exists(MANIFEST_FILE) or os.stat(MANIFEST_FILE).st_size == 0:
manifest = {"version": torch_version}
# Creating an empty manifest file for overwriting post setup
- os.system('touch {}'.format(MANIFEST_FILE))
+ os.system("touch {}".format(MANIFEST_FILE))
else:
manifest_exists = True
# Load manifest if already exists
- with open(MANIFEST_FILE, 'r') as f:
+ with open(MANIFEST_FILE, "r") as f:
manifest = json.load(f)
- if manifest['version'] == torch_version:
+ if manifest["version"] == torch_version:
version_matches = True
else:
- print("Torch version: {} mismatches \
+ print(
+ "Torch version: {} mismatches \
with manifest's version: {}. Re-downloading \
- all models".format(torch_version, manifest['version']))
+ all models".format(
+ torch_version, manifest["version"]
+ )
+ )
# Overwrite the manifest version as current torch version
- manifest['version'] = torch_version
+ manifest["version"] = torch_version
download_models(version_matches, manifest)
# Write updated manifest file to disk
- with open(MANIFEST_FILE, 'r+') as f:
+ with open(MANIFEST_FILE, "r+") as f:
data = f.read()
f.seek(0)
record = json.dumps(manifest)
f.write(record)
f.truncate()
--- tools/perf/utils.py 2022-08-11 18:17:06.779492 +0000
+++ tools/perf/utils.py 2022-08-11 18:20:27.479495 +0000
@@ -3,57 +3,46 @@
import custom_models as cm
import torchvision.models as models
import timm
BENCHMARK_MODELS = {
- "vgg16": {
- "model": models.vgg16(pretrained=True),
- "path": "script"
- },
- "resnet50": {
- "model": torch.hub.load('pytorch/vision:v0.9.0', 'resnet50', pretrained=True),
- "path": "script"
- },
- "efficientnet_b0": {
- "model": timm.create_model('efficientnet_b0', pretrained=True),
- "path": "script"
- },
- "vit": {
- "model": timm.create_model('vit_base_patch16_224', pretrained=True),
- "path": "script"
- },
- "bert_base_uncased": {
- "model": cm.BertModule(),
- "path": "trace"
- },
+ "vgg16": {"model": models.vgg16(pretrained=True), "path": "script"},
+ "resnet50": {"model": torch.hub.load("pytorch/vision:v0.9.0", "resnet50", pretrained=True), "path": "script"},
+ "efficientnet_b0": {"model": timm.create_model("efficientnet_b0", pretrained=True), "path": "script"},
+ "vit": {"model": timm.create_model("vit_base_patch16_224", pretrained=True), "path": "script"},
+ "bert_base_uncased": {"model": cm.BertModule(), "path": "trace"},
}
+
def precision_to_dtype(pr):
- if pr == 'fp32':
+ if pr == "fp32":
return torch.float
- elif pr == 'fp16' or pr == 'half':
+ elif pr == "fp16" or pr == "half":
return torch.half
- elif pr == 'int32':
+ elif pr == "int32":
return torch.int32
- elif pr == 'bool':
+ elif pr == "bool":
return torch.bool
else:
return torch.float32
+
def parse_inputs(user_inputs, dtype):
- parsed_inputs = user_inputs.split(';')
+ parsed_inputs = user_inputs.split(";")
torchtrt_inputs = []
for input in parsed_inputs:
input_shape = []
- input_shape_and_dtype = input.split('@')
+ input_shape_and_dtype = input.split("@")
dtype = precision_to_dtype(input_shape_and_dtype[1]) if len(input_shape_and_dtype) == 2 else dtype
- for input_dim in input_shape_and_dtype[0][1:-1].split(','):
+ for input_dim in input_shape_and_dtype[0][1:-1].split(","):
input_shape.append(int(input_dim))
torchtrt_inputs.append(torch.randint(0, 5, input_shape, dtype=dtype).cuda())
return torchtrt_inputs
+
def parse_backends(backends):
- return backends.split(',')
+ return backends.split(",")
+
def parse_precisions(precisions):
- return precisions.split(',')
+ return precisions.split(",")
--- tools/perf/perf_run.py 2022-08-11 18:17:06.779492 +0000
+++ tools/perf/perf_run.py 2022-08-11 18:20:27.591280 +0000
@@ -42,18 +42,21 @@
# Retrieves the value from the configuration else uses default values
def get(self, key, default_value=None):
if not key in self.params:
if not default_value:
- raise ValueError('Key {} is not present and default_value is not configured. Please run it with default value', key)
+ raise ValueError(
+ "Key {} is not present and default_value is not configured. Please run it with default value", key
+ )
self.params[key] = default_value
return self.params[key]
+
# Runs inference using Torch backend
def run_torch(model, input_tensors, params, precision, batch_size):
print("Running Torch for precision: ", precision, " batch_size : ", batch_size)
- iters = params.get('iterations', 20)
+ iters = params.get("iterations", 20)
# Warm up
with torch.no_grad():
for _ in range(WARMUP_ITER):
features = model(*input_tensors)
@@ -69,29 +72,30 @@
end_time = timeit.default_timer()
meas_time = end_time - start_time
timings.append(meas_time)
recordStats("Torch", timings, precision, batch_size)
+
# Runs inference using Torch-TensorRT backend
def run_torch_tensorrt(model, input_tensors, params, precision, truncate_long_and_double, batch_size):
print("Running Torch-TensorRT for precision: ", precision, " batch_size : ", batch_size)
# Compiling Torch-TensorRT model
compile_settings = {
- "inputs": input_tensors,
- "enabled_precisions": {precision_to_dtype(precision)} ,
- "truncate_long_and_double": truncate_long_and_double,
- "min_block_size" : 1,
+ "inputs": input_tensors,
+ "enabled_precisions": {precision_to_dtype(precision)},
+ "truncate_long_and_double": truncate_long_and_double,
+ "min_block_size": 1,
}
- if precision == 'int8':
- compile_settings.update({"calib": params.get('calibration_cache')})
+ if precision == "int8":
+ compile_settings.update({"calib": params.get("calibration_cache")})
with torchtrt.logging.errors():
model = torchtrt.compile(model, **compile_settings)
- iters = params.get('iterations', 20)
+ iters = params.get("iterations", 20)
# Warm up
with torch.no_grad():
for _ in range(WARMUP_ITER):
features = model(*input_tensors)
@@ -106,10 +110,11 @@
end_time = timeit.default_timer()
meas_time = end_time - start_time
timings.append(meas_time)
recordStats("Torch-TensorRT", timings, precision, batch_size)
+
# Runs inference using FX2TRT backend
def run_fx2trt(model, input_tensors, params, precision, batch_size):
print("Running FX2TRT for precision: ", precision, " batch_size : ", batch_size)
if precision == "fp32":
@@ -125,11 +130,11 @@
max_batch_size=batch_size,
lower_precision=precision,
verbose_log=False,
)
- iters = params.get('iterations', 20)
+ iters = params.get("iterations", 20)
# Warm up
with torch.no_grad():
for _ in range(WARMUP_ITER):
features = model(*input_tensors)
@@ -144,10 +149,11 @@
end_time = timeit.default_timer()
meas_time = end_time - start_time
timings.append(meas_time)
recordStats("FX-TensorRT", timings, precision, batch_size)
+
def torch_dtype_from_trt(dtype):
if dtype == trt.int8:
return torch.int8
elif dtype == trt.bool:
@@ -159,20 +165,23 @@
elif dtype == trt.float32:
return torch.float32
else:
raise TypeError("%s is not supported by torch" % dtype)
+
def torch_device_from_trt(device):
if device == trt.TensorLocation.DEVICE:
return torch.device("cuda")
elif device == trt.TensorLocation.HOST:
return torch.device("cpu")
else:
return TypeError("%s is not supported by torch" % device)
-def run_tensorrt(model, input_tensors, params, precision, truncate_long_and_double=False, is_trt_engine=False, batch_size=1):
+def run_tensorrt(
+ model, input_tensors, params, precision, truncate_long_and_double=False, is_trt_engine=False, batch_size=1
+):
engine = None
# If the model file is a TensorRT engine then directly deserialize and run inference
# else convert the torch module to a TensorRT engine first and then run inference
if not is_trt_engine:
@@ -189,16 +198,16 @@
# Deserialize the TensorRT engine
with trt.Logger() as logger, trt.Runtime(logger) as runtime:
engine = runtime.deserialize_cuda_engine(model)
print("Running TensorRT for precision: ", precision, " batch_size : ", batch_size)
- iters = params.get('iterations', 20)
+ iters = params.get("iterations", 20)
# Compiling the bindings
bindings = engine.num_bindings * [None]
k = 0
- for idx,_ in enumerate(bindings):
+ for idx, _ in enumerate(bindings):
dtype = torch_dtype_from_trt(engine.get_binding_dtype(idx))
shape = tuple(engine.get_binding_shape(idx))
device = torch_device_from_trt(engine.get_location(idx))
if not engine.binding_is_input(idx):
# Output bindings
@@ -223,23 +232,26 @@
meas_time = end_time - start_time
timings.append(meas_time)
recordStats("TensorRT", timings, precision, batch_size)
+
# Deploys inference run for different backend configurations
-def run(model, backends, input_tensors, params, precision, truncate_long_and_double=False, batch_size=1, is_trt_engine=False):
+def run(
+ model, backends, input_tensors, params, precision, truncate_long_and_double=False, batch_size=1, is_trt_engine=False
+):
for backend in backends:
- if precision == 'int8':
- if backend == 'all' or backend == 'torch':
+ if precision == "int8":
+ if backend == "all" or backend == "torch":
print("int8 precision is not supported for torch runtime in this script yet")
return False
- if backend == 'all' or backend == 'torch_tensorrt' or params.get('calibration_cache', None) == None:
+ if backend == "all" or backend == "torch_tensorrt" or params.get("calibration_cache", None) == None:
print("int8 precision expects calibration cache file for inference")
return False
- if backend == 'all':
+ if backend == "all":
run_torch(model, input_tensors, params, precision, batch_size)
run_torch_tensorrt(model, input_tensors, params, precision, truncate_long_and_double, batch_size)
run_tensorrt(model, input_tensors, params, precision, truncate_long_and_double, is_trt_engine, batch_size)
elif backend == "torch":
@@ -252,12 +264,13 @@
run_fx2trt(model, input_tensors, params, precision, batch_size)
elif backend == "tensorrt":
run_tensorrt(model, input_tensors, params, precision, truncate_long_and_double, is_trt_engine, batch_size)
+
# Generate report
-def recordStats(backend, timings, precision, batch_size = 1):
+def recordStats(backend, timings, precision, batch_size=1):
times = np.array(timings)
steps = len(times)
speeds = batch_size / times
time_mean = np.mean(times)
time_med = np.median(times)
@@ -265,55 +278,77 @@
time_std = np.std(times, ddof=0)
speed_mean = np.mean(speeds)
speed_med = np.median(speeds)
stats = {
- 'Backend' : backend,
- 'Precision' : precision,
- 'Batch size' : batch_size,
- 'Median(FPS)' : speed_med,
- 'Mean(FPS)' : speed_mean,
- 'Median-Latency(ms)' : time_med,
- 'Mean-Latency(ms)' : time_mean,
+ "Backend": backend,
+ "Precision": precision,
+ "Batch size": batch_size,
+ "Median(FPS)": speed_med,
+ "Mean(FPS)": speed_mean,
+ "Median-Latency(ms)": time_med,
+ "Mean-Latency(ms)": time_mean,
}
results.append(stats)
+
def load_model(params):
model = None
is_trt_engine = False
# Load torch model traced/scripted
- model_file = params.get('model').get('filename')
- try :
- model_name = params.get('model').get('name')
+ model_file = params.get("model").get("filename")
+ try:
+ model_name = params.get("model").get("name")
except:
model_name = model_file
print("Loading model: ", model_file)
- if model_file.endswith('.plan'):
+ if model_file.endswith(".plan"):
is_trt_engine = True
# Read the TensorRT engine file
- with open(model_file, 'rb') as fin:
+ with open(model_file, "rb") as fin:
model = fin.read()
else:
model = torch.jit.load(model_file).cuda()
return model, model_name, is_trt_engine
-if __name__ == '__main__':
+if __name__ == "__main__":
arg_parser = argparse.ArgumentParser(description="Run inference on a model with random input values")
- arg_parser.add_argument("--config", type=str, help="Load YAML based configuration file to run the inference. If this is used other params will be ignored")
+ arg_parser.add_argument(
+ "--config",
+ type=str,
+ help="Load YAML based configuration file to run the inference. If this is used other params will be ignored",
+ )
# The following options are manual user provided settings
- arg_parser.add_argument("--backends", type=str, help="Comma separated string of backends. Eg: torch,torch_tensorrt,tensorrt")
+ arg_parser.add_argument(
+ "--backends", type=str, help="Comma separated string of backends. Eg: torch,torch_tensorrt,tensorrt"
+ )
arg_parser.add_argument("--model", type=str, help="Name of the model file")
- arg_parser.add_argument("--inputs", type=str, help="List of input shapes. Eg: (1, 3, 224, 224)@fp32 for Resnet or (1, 128)@int32;(1, 128)@int32 for BERT")
+ arg_parser.add_argument(
+ "--inputs",
+ type=str,
+ help="List of input shapes. Eg: (1, 3, 224, 224)@fp32 for Resnet or (1, 128)@int32;(1, 128)@int32 for BERT",
+ )
arg_parser.add_argument("--batch_size", type=int, default=1, help="Batch size to build and run")
- arg_parser.add_argument("--precision", default="fp32", type=str, help="Comma separated list of precisions to build TensorRT engine Eg: fp32,fp16")
+ arg_parser.add_argument(
+ "--precision",
+ default="fp32",
+ type=str,
+ help="Comma separated list of precisions to build TensorRT engine Eg: fp32,fp16",
+ )
arg_parser.add_argument("--calibration_cache", type=str, help="Name of the calibration cache file")
arg_parser.add_argument("--device", type=int, help="device id")
- arg_parser.add_argument("--truncate", action='store_true', help="Truncate long and double weights in the network in Torch-TensorRT")
- arg_parser.add_argument("--is_trt_engine", action='store_true', help="Boolean flag to determine if the user provided model is a TRT engine or not")
+ arg_parser.add_argument(
+ "--truncate", action="store_true", help="Truncate long and double weights in the network in Torch-TensorRT"
+ )
+ arg_parser.add_argument(
+ "--is_trt_engine",
+ action="store_true",
+ help="Boolean flag to determine if the user provided model is a TRT engine or not",
+ )
arg_parser.add_argument("--report", type=str, help="Path of the output file where performance summary is written.")
args = arg_parser.parse_args()
cudnn.benchmark = True
# Create random input tensor of certain size
@@ -324,59 +359,69 @@
# Load YAML params
params = parser.read_config()
model, model_name, is_trt_engine = load_model(params)
# Default device is set to 0. Configurable using yaml config file.
- torch.cuda.set_device(params.get('runtime').get('device', 0))
-
- num_input = params.get('input').get('num_inputs')
- truncate_long_and_double = params.get('runtime').get('truncate_long_and_double', False)
- batch_size = params.get('input').get('batch_size', 1)
- for precision in params.get('runtime').get('precision', 'fp32'):
+ torch.cuda.set_device(params.get("runtime").get("device", 0))
+
+ num_input = params.get("input").get("num_inputs")
+ truncate_long_and_double = params.get("runtime").get("truncate_long_and_double", False)
+ batch_size = params.get("input").get("batch_size", 1)
+ for precision in params.get("runtime").get("precision", "fp32"):
input_tensors = []
- num_input = params.get('input').get('num_inputs', 1)
+ num_input = params.get("input").get("num_inputs", 1)
for i in range(num_input):
- inp_tensor = params.get('input').get('input' + str(i))
- input_tensors.append(torch.randint(0, 2, tuple(d for d in inp_tensor), dtype=precision_to_dtype(precision)).cuda())
+ inp_tensor = params.get("input").get("input" + str(i))
+ input_tensors.append(
+ torch.randint(0, 2, tuple(d for d in inp_tensor), dtype=precision_to_dtype(precision)).cuda()
+ )
if is_trt_engine:
- print("Warning, TensorRT engine file is configured. Please make sure the precision matches with the TRT engine for reliable results")
+ print(
+ "Warning, TensorRT engine file is configured. Please make sure the precision matches with the TRT engine for reliable results"
+ )
if not is_trt_engine and (precision == "fp16" or precision == "half"):
# If model is TensorRT serialized engine then model.half will report failure
model = model.half()
- backends = params.get('backend')
+ backends = params.get("backend")
# Run inference
- status = run(model, backends, input_tensors, params, precision, truncate_long_and_double, batch_size, is_trt_engine)
+ status = run(
+ model, backends, input_tensors, params, precision, truncate_long_and_double, batch_size, is_trt_engine
+ )
else:
params = vars(args)
- model_name = params['model']
+ model_name = params["model"]
if os.path.exists(model_name):
print("Loading user provided model: ", model_name)
model = torch.jit.load(model_name).cuda().eval()
elif model_name in BENCHMARK_MODELS:
- model = BENCHMARK_MODELS[model_name]['model'].eval().cuda()
+ model = BENCHMARK_MODELS[model_name]["model"].eval().cuda()
else:
- raise ValueError("Invalid model name. Please provide a torchscript model file or model name (among the following options vgg16|resnet50|efficientnet_b0|vit)")
-
- backends = parse_backends(params['backends'])
- truncate_long_and_double = params['truncate']
- batch_size = params['batch_size']
- is_trt_engine = params['is_trt_engine']
- precisions = parse_precisions(params['precision'])
+ raise ValueError(
+ "Invalid model name. Please provide a torchscript model file or model name (among the following options vgg16|resnet50|efficientnet_b0|vit)"
+ )
+
+ backends = parse_backends(params["backends"])
+ truncate_long_and_double = params["truncate"]
+ batch_size = params["batch_size"]
+ is_trt_engine = params["is_trt_engine"]
+ precisions = parse_precisions(params["precision"])
for precision in precisions:
- input_tensors = parse_inputs(params['inputs'], precision_to_dtype(precision))
+ input_tensors = parse_inputs(params["inputs"], precision_to_dtype(precision))
if not is_trt_engine and (precision == "fp16" or precision == "half"):
# If model is TensorRT serialized engine then model.half will report failure
model = model.half()
- status = run(model, backends, input_tensors, params, precision, truncate_long_and_double, batch_size, is_trt_engine)
+ status = run(
+ model, backends, input_tensors, params, precision, truncate_long_and_double, batch_size, is_trt_engine
+ )
# Generate report
- print('Model Summary: ', model_name)
+ print("Model Summary: ", model_name)
summary = pd.DataFrame(results)
print(summary)
- with open(args.report, 'w') as file:
- file.write('Model Summary: ' + model_name + '\n')
+ with open(args.report, "w") as file:
+ file.write("Model Summary: " + model_name + "\n")
file.write(summary.to_string())
file.close()
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are some changes that do not conform to Python style guidelines:
--- tools/perf/custom_models.py 2022-08-11 18:16:52.449633 +0000
+++ tools/perf/custom_models.py 2022-08-11 18:20:36.868136 +0000
@@ -1,9 +1,10 @@
import torch
import torch.nn as nn
from transformers import BertModel, BertTokenizer, BertConfig
import torch.nn.functional as F
+
def BertModule():
model_name = "bert-base-uncased"
enc = BertTokenizer.from_pretrained(model_name)
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
--- tools/perf/hub.py 2022-08-11 18:16:52.449633 +0000
+++ tools/perf/hub.py 2022-08-11 18:20:36.928094 +0000
@@ -15,40 +15,25 @@
# Detect case of no GPU before deserialization of models on GPU
if not torch.cuda.is_available():
raise Exception("No GPU found. Please check if installed torch version is compatible with CUDA version")
# Downloads all model files again if manifest file is not present
-MANIFEST_FILE = 'model_manifest.json'
+MANIFEST_FILE = "model_manifest.json"
BENCHMARK_MODELS = {
- "vgg16": {
- "model": models.vgg16(weights=None),
- "path": "script"
- },
- "resnet50": {
- "model": models.resnet50(weights=None),
- "path": "script"
- },
- "efficientnet_b0": {
- "model": timm.create_model('efficientnet_b0', pretrained=True),
- "path": "script"
- },
- "vit": {
- "model": timm.create_model('vit_base_patch16_224', pretrained=True),
- "path": "script"
- },
- "bert_base_uncased": {
- "model": cm.BertModule(),
- "path": "trace"
- },
+ "vgg16": {"model": models.vgg16(weights=None), "path": "script"},
+ "resnet50": {"model": models.resnet50(weights=None), "path": "script"},
+ "efficientnet_b0": {"model": timm.create_model("efficientnet_b0", pretrained=True), "path": "script"},
+ "vit": {"model": timm.create_model("vit_base_patch16_224", pretrained=True), "path": "script"},
+ "bert_base_uncased": {"model": cm.BertModule(), "path": "trace"},
}
def get(n, m, manifest):
print("Downloading {}".format(n))
- traced_filename = "models/" + n + '_traced.jit.pt'
- script_filename = "models/" + n + '_scripted.jit.pt'
+ traced_filename = "models/" + n + "_traced.jit.pt"
+ script_filename = "models/" + n + "_scripted.jit.pt"
x = torch.ones((1, 3, 300, 300)).cuda()
if n == "bert-base-uncased":
traced_model = m["model"]
torch.jit.save(traced_model, traced_filename)
manifest.update({n: [traced_filename]})
@@ -78,13 +63,15 @@
else:
for n, m in BENCHMARK_MODELS.items():
scripted_filename = "models/" + n + "_scripted.jit.pt"
traced_filename = "models/" + n + "_traced.jit.pt"
# Check if model file exists on disk
- if (m["path"] == "both" and os.path.exists(scripted_filename) and os.path.exists(traced_filename)) or \
- (m["path"] == "script" and os.path.exists(scripted_filename)) or \
- (m["path"] == "trace" and os.path.exists(traced_filename)):
+ if (
+ (m["path"] == "both" and os.path.exists(scripted_filename) and os.path.exists(traced_filename))
+ or (m["path"] == "script" and os.path.exists(scripted_filename))
+ or (m["path"] == "trace" and os.path.exists(traced_filename))
+ ):
print("Skipping {} ".format(n))
continue
manifest = get(n, m, manifest)
@@ -96,31 +83,35 @@
# Check if Manifest file exists or is empty
if not os.path.exists(MANIFEST_FILE) or os.stat(MANIFEST_FILE).st_size == 0:
manifest = {"version": torch_version}
# Creating an empty manifest file for overwriting post setup
- os.system('touch {}'.format(MANIFEST_FILE))
+ os.system("touch {}".format(MANIFEST_FILE))
else:
manifest_exists = True
# Load manifest if already exists
- with open(MANIFEST_FILE, 'r') as f:
+ with open(MANIFEST_FILE, "r") as f:
manifest = json.load(f)
- if manifest['version'] == torch_version:
+ if manifest["version"] == torch_version:
version_matches = True
else:
- print("Torch version: {} mismatches \
+ print(
+ "Torch version: {} mismatches \
with manifest's version: {}. Re-downloading \
- all models".format(torch_version, manifest['version']))
+ all models".format(
+ torch_version, manifest["version"]
+ )
+ )
# Overwrite the manifest version as current torch version
- manifest['version'] = torch_version
+ manifest["version"] = torch_version
download_models(version_matches, manifest)
# Write updated manifest file to disk
- with open(MANIFEST_FILE, 'r+') as f:
+ with open(MANIFEST_FILE, "r+") as f:
data = f.read()
f.seek(0)
record = json.dumps(manifest)
f.write(record)
f.truncate()
--- tools/perf/utils.py 2022-08-11 18:16:52.449633 +0000
+++ tools/perf/utils.py 2022-08-11 18:20:36.964639 +0000
@@ -3,57 +3,46 @@
import custom_models as cm
import torchvision.models as models
import timm
BENCHMARK_MODELS = {
- "vgg16": {
- "model": models.vgg16(pretrained=True),
- "path": "script"
- },
- "resnet50": {
- "model": torch.hub.load('pytorch/vision:v0.9.0', 'resnet50', pretrained=True),
- "path": "script"
- },
- "efficientnet_b0": {
- "model": timm.create_model('efficientnet_b0', pretrained=True),
- "path": "script"
- },
- "vit": {
- "model": timm.create_model('vit_base_patch16_224', pretrained=True),
- "path": "script"
- },
- "bert_base_uncased": {
- "model": cm.BertModule(),
- "path": "trace"
- },
+ "vgg16": {"model": models.vgg16(pretrained=True), "path": "script"},
+ "resnet50": {"model": torch.hub.load("pytorch/vision:v0.9.0", "resnet50", pretrained=True), "path": "script"},
+ "efficientnet_b0": {"model": timm.create_model("efficientnet_b0", pretrained=True), "path": "script"},
+ "vit": {"model": timm.create_model("vit_base_patch16_224", pretrained=True), "path": "script"},
+ "bert_base_uncased": {"model": cm.BertModule(), "path": "trace"},
}
+
def precision_to_dtype(pr):
- if pr == 'fp32':
+ if pr == "fp32":
return torch.float
- elif pr == 'fp16' or pr == 'half':
+ elif pr == "fp16" or pr == "half":
return torch.half
- elif pr == 'int32':
+ elif pr == "int32":
return torch.int32
- elif pr == 'bool':
+ elif pr == "bool":
return torch.bool
else:
return torch.float32
+
def parse_inputs(user_inputs, dtype):
- parsed_inputs = user_inputs.split(';')
+ parsed_inputs = user_inputs.split(";")
torchtrt_inputs = []
for input in parsed_inputs:
input_shape = []
- input_shape_and_dtype = input.split('@')
+ input_shape_and_dtype = input.split("@")
dtype = precision_to_dtype(input_shape_and_dtype[1]) if len(input_shape_and_dtype) == 2 else dtype
- for input_dim in input_shape_and_dtype[0][1:-1].split(','):
+ for input_dim in input_shape_and_dtype[0][1:-1].split(","):
input_shape.append(int(input_dim))
torchtrt_inputs.append(torch.randint(0, 5, input_shape, dtype=dtype).cuda())
return torchtrt_inputs
+
def parse_backends(backends):
- return backends.split(',')
+ return backends.split(",")
+
def parse_precisions(precisions):
- return precisions.split(',')
+ return precisions.split(",")
--- tools/perf/perf_run.py 2022-08-11 18:16:52.449633 +0000
+++ tools/perf/perf_run.py 2022-08-11 18:20:37.147878 +0000
@@ -42,18 +42,21 @@
# Retrieves the value from the configuration else uses default values
def get(self, key, default_value=None):
if not key in self.params:
if not default_value:
- raise ValueError('Key {} is not present and default_value is not configured. Please run it with default value', key)
+ raise ValueError(
+ "Key {} is not present and default_value is not configured. Please run it with default value", key
+ )
self.params[key] = default_value
return self.params[key]
+
# Runs inference using Torch backend
def run_torch(model, input_tensors, params, precision, batch_size):
print("Running Torch for precision: ", precision, " batch_size : ", batch_size)
- iters = params.get('iterations', 20)
+ iters = params.get("iterations", 20)
# Warm up
with torch.no_grad():
for _ in range(WARMUP_ITER):
features = model(*input_tensors)
@@ -69,29 +72,30 @@
end_time = timeit.default_timer()
meas_time = end_time - start_time
timings.append(meas_time)
recordStats("Torch", timings, precision, batch_size)
+
# Runs inference using Torch-TensorRT backend
def run_torch_tensorrt(model, input_tensors, params, precision, truncate_long_and_double, batch_size):
print("Running Torch-TensorRT for precision: ", precision, " batch_size : ", batch_size)
# Compiling Torch-TensorRT model
compile_settings = {
- "inputs": input_tensors,
- "enabled_precisions": {precision_to_dtype(precision)} ,
- "truncate_long_and_double": truncate_long_and_double,
- "min_block_size" : 1,
+ "inputs": input_tensors,
+ "enabled_precisions": {precision_to_dtype(precision)},
+ "truncate_long_and_double": truncate_long_and_double,
+ "min_block_size": 1,
}
- if precision == 'int8':
- compile_settings.update({"calib": params.get('calibration_cache')})
+ if precision == "int8":
+ compile_settings.update({"calib": params.get("calibration_cache")})
with torchtrt.logging.errors():
model = torchtrt.compile(model, **compile_settings)
- iters = params.get('iterations', 20)
+ iters = params.get("iterations", 20)
# Warm up
with torch.no_grad():
for _ in range(WARMUP_ITER):
features = model(*input_tensors)
@@ -106,10 +110,11 @@
end_time = timeit.default_timer()
meas_time = end_time - start_time
timings.append(meas_time)
recordStats("Torch-TensorRT", timings, precision, batch_size)
+
# Runs inference using FX2TRT backend
def run_fx2trt(model, input_tensors, params, precision, batch_size):
print("Running FX2TRT for precision: ", precision, " batch_size : ", batch_size)
if precision == "fp32":
@@ -125,11 +130,11 @@
max_batch_size=batch_size,
lower_precision=precision,
verbose_log=False,
)
- iters = params.get('iterations', 20)
+ iters = params.get("iterations", 20)
# Warm up
with torch.no_grad():
for _ in range(WARMUP_ITER):
features = model(*input_tensors)
@@ -144,10 +149,11 @@
end_time = timeit.default_timer()
meas_time = end_time - start_time
timings.append(meas_time)
recordStats("FX-TensorRT", timings, precision, batch_size)
+
def torch_dtype_from_trt(dtype):
if dtype == trt.int8:
return torch.int8
elif dtype == trt.bool:
@@ -159,20 +165,23 @@
elif dtype == trt.float32:
return torch.float32
else:
raise TypeError("%s is not supported by torch" % dtype)
+
def torch_device_from_trt(device):
if device == trt.TensorLocation.DEVICE:
return torch.device("cuda")
elif device == trt.TensorLocation.HOST:
return torch.device("cpu")
else:
return TypeError("%s is not supported by torch" % device)
-def run_tensorrt(model, input_tensors, params, precision, truncate_long_and_double=False, is_trt_engine=False, batch_size=1):
+def run_tensorrt(
+ model, input_tensors, params, precision, truncate_long_and_double=False, is_trt_engine=False, batch_size=1
+):
engine = None
# If the model file is a TensorRT engine then directly deserialize and run inference
# else convert the torch module to a TensorRT engine first and then run inference
if not is_trt_engine:
@@ -189,16 +198,16 @@
# Deserialize the TensorRT engine
with trt.Logger() as logger, trt.Runtime(logger) as runtime:
engine = runtime.deserialize_cuda_engine(model)
print("Running TensorRT for precision: ", precision, " batch_size : ", batch_size)
- iters = params.get('iterations', 20)
+ iters = params.get("iterations", 20)
# Compiling the bindings
bindings = engine.num_bindings * [None]
k = 0
- for idx,_ in enumerate(bindings):
+ for idx, _ in enumerate(bindings):
dtype = torch_dtype_from_trt(engine.get_binding_dtype(idx))
shape = tuple(engine.get_binding_shape(idx))
device = torch_device_from_trt(engine.get_location(idx))
if not engine.binding_is_input(idx):
# Output bindings
@@ -223,23 +232,26 @@
meas_time = end_time - start_time
timings.append(meas_time)
recordStats("TensorRT", timings, precision, batch_size)
+
# Deploys inference run for different backend configurations
-def run(model, backends, input_tensors, params, precision, truncate_long_and_double=False, batch_size=1, is_trt_engine=False):
+def run(
+ model, backends, input_tensors, params, precision, truncate_long_and_double=False, batch_size=1, is_trt_engine=False
+):
for backend in backends:
- if precision == 'int8':
- if backend == 'all' or backend == 'torch':
+ if precision == "int8":
+ if backend == "all" or backend == "torch":
print("int8 precision is not supported for torch runtime in this script yet")
return False
- if backend == 'all' or backend == 'torch_tensorrt' or params.get('calibration_cache', None) == None:
+ if backend == "all" or backend == "torch_tensorrt" or params.get("calibration_cache", None) == None:
print("int8 precision expects calibration cache file for inference")
return False
- if backend == 'all':
+ if backend == "all":
run_torch(model, input_tensors, params, precision, batch_size)
run_torch_tensorrt(model, input_tensors, params, precision, truncate_long_and_double, batch_size)
run_tensorrt(model, input_tensors, params, precision, truncate_long_and_double, is_trt_engine, batch_size)
elif backend == "torch":
@@ -252,12 +264,13 @@
run_fx2trt(model, input_tensors, params, precision, batch_size)
elif backend == "tensorrt":
run_tensorrt(model, input_tensors, params, precision, truncate_long_and_double, is_trt_engine, batch_size)
+
# Generate report
-def recordStats(backend, timings, precision, batch_size = 1):
+def recordStats(backend, timings, precision, batch_size=1):
times = np.array(timings)
steps = len(times)
speeds = batch_size / times
time_mean = np.mean(times)
time_med = np.median(times)
@@ -265,55 +278,77 @@
time_std = np.std(times, ddof=0)
speed_mean = np.mean(speeds)
speed_med = np.median(speeds)
stats = {
- 'Backend' : backend,
- 'Precision' : precision,
- 'Batch size' : batch_size,
- 'Median(FPS)' : speed_med,
- 'Mean(FPS)' : speed_mean,
- 'Median-Latency(ms)' : time_med,
- 'Mean-Latency(ms)' : time_mean,
+ "Backend": backend,
+ "Precision": precision,
+ "Batch size": batch_size,
+ "Median(FPS)": speed_med,
+ "Mean(FPS)": speed_mean,
+ "Median-Latency(ms)": time_med,
+ "Mean-Latency(ms)": time_mean,
}
results.append(stats)
+
def load_model(params):
model = None
is_trt_engine = False
# Load torch model traced/scripted
- model_file = params.get('model').get('filename')
- try :
- model_name = params.get('model').get('name')
+ model_file = params.get("model").get("filename")
+ try:
+ model_name = params.get("model").get("name")
except:
model_name = model_file
print("Loading model: ", model_file)
- if model_file.endswith('.plan'):
+ if model_file.endswith(".plan"):
is_trt_engine = True
# Read the TensorRT engine file
- with open(model_file, 'rb') as fin:
+ with open(model_file, "rb") as fin:
model = fin.read()
else:
model = torch.jit.load(model_file).cuda()
return model, model_name, is_trt_engine
-if __name__ == '__main__':
+if __name__ == "__main__":
arg_parser = argparse.ArgumentParser(description="Run inference on a model with random input values")
- arg_parser.add_argument("--config", type=str, help="Load YAML based configuration file to run the inference. If this is used other params will be ignored")
+ arg_parser.add_argument(
+ "--config",
+ type=str,
+ help="Load YAML based configuration file to run the inference. If this is used other params will be ignored",
+ )
# The following options are manual user provided settings
- arg_parser.add_argument("--backends", type=str, help="Comma separated string of backends. Eg: torch,torch_tensorrt,tensorrt")
+ arg_parser.add_argument(
+ "--backends", type=str, help="Comma separated string of backends. Eg: torch,torch_tensorrt,tensorrt"
+ )
arg_parser.add_argument("--model", type=str, help="Name of the model file")
- arg_parser.add_argument("--inputs", type=str, help="List of input shapes. Eg: (1, 3, 224, 224)@fp32 for Resnet or (1, 128)@int32;(1, 128)@int32 for BERT")
+ arg_parser.add_argument(
+ "--inputs",
+ type=str,
+ help="List of input shapes. Eg: (1, 3, 224, 224)@fp32 for Resnet or (1, 128)@int32;(1, 128)@int32 for BERT",
+ )
arg_parser.add_argument("--batch_size", type=int, default=1, help="Batch size to build and run")
- arg_parser.add_argument("--precision", default="fp32", type=str, help="Comma separated list of precisions to build TensorRT engine Eg: fp32,fp16")
+ arg_parser.add_argument(
+ "--precision",
+ default="fp32",
+ type=str,
+ help="Comma separated list of precisions to build TensorRT engine Eg: fp32,fp16",
+ )
arg_parser.add_argument("--calibration_cache", type=str, help="Name of the calibration cache file")
arg_parser.add_argument("--device", type=int, help="device id")
- arg_parser.add_argument("--truncate", action='store_true', help="Truncate long and double weights in the network in Torch-TensorRT")
- arg_parser.add_argument("--is_trt_engine", action='store_true', help="Boolean flag to determine if the user provided model is a TRT engine or not")
+ arg_parser.add_argument(
+ "--truncate", action="store_true", help="Truncate long and double weights in the network in Torch-TensorRT"
+ )
+ arg_parser.add_argument(
+ "--is_trt_engine",
+ action="store_true",
+ help="Boolean flag to determine if the user provided model is a TRT engine or not",
+ )
arg_parser.add_argument("--report", type=str, help="Path of the output file where performance summary is written.")
args = arg_parser.parse_args()
cudnn.benchmark = True
# Create random input tensor of certain size
@@ -324,59 +359,69 @@
# Load YAML params
params = parser.read_config()
model, model_name, is_trt_engine = load_model(params)
# Default device is set to 0. Configurable using yaml config file.
- torch.cuda.set_device(params.get('runtime').get('device', 0))
-
- num_input = params.get('input').get('num_inputs')
- truncate_long_and_double = params.get('runtime').get('truncate_long_and_double', False)
- batch_size = params.get('input').get('batch_size', 1)
- for precision in params.get('runtime').get('precision', 'fp32'):
+ torch.cuda.set_device(params.get("runtime").get("device", 0))
+
+ num_input = params.get("input").get("num_inputs")
+ truncate_long_and_double = params.get("runtime").get("truncate_long_and_double", False)
+ batch_size = params.get("input").get("batch_size", 1)
+ for precision in params.get("runtime").get("precision", "fp32"):
input_tensors = []
- num_input = params.get('input').get('num_inputs', 1)
+ num_input = params.get("input").get("num_inputs", 1)
for i in range(num_input):
- inp_tensor = params.get('input').get('input' + str(i))
- input_tensors.append(torch.randint(0, 2, tuple(d for d in inp_tensor), dtype=precision_to_dtype(precision)).cuda())
+ inp_tensor = params.get("input").get("input" + str(i))
+ input_tensors.append(
+ torch.randint(0, 2, tuple(d for d in inp_tensor), dtype=precision_to_dtype(precision)).cuda()
+ )
if is_trt_engine:
- print("Warning, TensorRT engine file is configured. Please make sure the precision matches with the TRT engine for reliable results")
+ print(
+ "Warning, TensorRT engine file is configured. Please make sure the precision matches with the TRT engine for reliable results"
+ )
if not is_trt_engine and (precision == "fp16" or precision == "half"):
# If model is TensorRT serialized engine then model.half will report failure
model = model.half()
- backends = params.get('backend')
+ backends = params.get("backend")
# Run inference
- status = run(model, backends, input_tensors, params, precision, truncate_long_and_double, batch_size, is_trt_engine)
+ status = run(
+ model, backends, input_tensors, params, precision, truncate_long_and_double, batch_size, is_trt_engine
+ )
else:
params = vars(args)
- model_name = params['model']
+ model_name = params["model"]
if os.path.exists(model_name):
print("Loading user provided model: ", model_name)
model = torch.jit.load(model_name).cuda().eval()
elif model_name in BENCHMARK_MODELS:
- model = BENCHMARK_MODELS[model_name]['model'].eval().cuda()
+ model = BENCHMARK_MODELS[model_name]["model"].eval().cuda()
else:
- raise ValueError("Invalid model name. Please provide a torchscript model file or model name (among the following options vgg16|resnet50|efficientnet_b0|vit)")
-
- backends = parse_backends(params['backends'])
- truncate_long_and_double = params['truncate']
- batch_size = params['batch_size']
- is_trt_engine = params['is_trt_engine']
- precisions = parse_precisions(params['precision'])
+ raise ValueError(
+ "Invalid model name. Please provide a torchscript model file or model name (among the following options vgg16|resnet50|efficientnet_b0|vit)"
+ )
+
+ backends = parse_backends(params["backends"])
+ truncate_long_and_double = params["truncate"]
+ batch_size = params["batch_size"]
+ is_trt_engine = params["is_trt_engine"]
+ precisions = parse_precisions(params["precision"])
for precision in precisions:
- input_tensors = parse_inputs(params['inputs'], precision_to_dtype(precision))
+ input_tensors = parse_inputs(params["inputs"], precision_to_dtype(precision))
if not is_trt_engine and (precision == "fp16" or precision == "half"):
# If model is TensorRT serialized engine then model.half will report failure
model = model.half()
- status = run(model, backends, input_tensors, params, precision, truncate_long_and_double, batch_size, is_trt_engine)
+ status = run(
+ model, backends, input_tensors, params, precision, truncate_long_and_double, batch_size, is_trt_engine
+ )
# Generate report
- print('Model Summary: ', model_name)
+ print("Model Summary: ", model_name)
summary = pd.DataFrame(results)
print(summary)
- with open(args.report, 'w') as file:
- file.write('Model Summary: ' + model_name + '\n')
+ with open(args.report, "w") as file:
+ file.write("Model Summary: " + model_name + "\n")
file.write(summary.to_string())
file.close()
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are some changes that do not conform to Python style guidelines:
--- tools/perf/custom_models.py 2022-08-11 18:16:56.670388 +0000
+++ tools/perf/custom_models.py 2022-08-11 18:20:40.224783 +0000
@@ -1,9 +1,10 @@
import torch
import torch.nn as nn
from transformers import BertModel, BertTokenizer, BertConfig
import torch.nn.functional as F
+
def BertModule():
model_name = "bert-base-uncased"
enc = BertTokenizer.from_pretrained(model_name)
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
--- tools/perf/hub.py 2022-08-11 18:16:56.670388 +0000
+++ tools/perf/hub.py 2022-08-11 18:20:40.281854 +0000
@@ -15,40 +15,25 @@
# Detect case of no GPU before deserialization of models on GPU
if not torch.cuda.is_available():
raise Exception("No GPU found. Please check if installed torch version is compatible with CUDA version")
# Downloads all model files again if manifest file is not present
-MANIFEST_FILE = 'model_manifest.json'
+MANIFEST_FILE = "model_manifest.json"
BENCHMARK_MODELS = {
- "vgg16": {
- "model": models.vgg16(weights=None),
- "path": "script"
- },
- "resnet50": {
- "model": models.resnet50(weights=None),
- "path": "script"
- },
- "efficientnet_b0": {
- "model": timm.create_model('efficientnet_b0', pretrained=True),
- "path": "script"
- },
- "vit": {
- "model": timm.create_model('vit_base_patch16_224', pretrained=True),
- "path": "script"
- },
- "bert_base_uncased": {
- "model": cm.BertModule(),
- "path": "trace"
- },
+ "vgg16": {"model": models.vgg16(weights=None), "path": "script"},
+ "resnet50": {"model": models.resnet50(weights=None), "path": "script"},
+ "efficientnet_b0": {"model": timm.create_model("efficientnet_b0", pretrained=True), "path": "script"},
+ "vit": {"model": timm.create_model("vit_base_patch16_224", pretrained=True), "path": "script"},
+ "bert_base_uncased": {"model": cm.BertModule(), "path": "trace"},
}
def get(n, m, manifest):
print("Downloading {}".format(n))
- traced_filename = "models/" + n + '_traced.jit.pt'
- script_filename = "models/" + n + '_scripted.jit.pt'
+ traced_filename = "models/" + n + "_traced.jit.pt"
+ script_filename = "models/" + n + "_scripted.jit.pt"
x = torch.ones((1, 3, 300, 300)).cuda()
if n == "bert-base-uncased":
traced_model = m["model"]
torch.jit.save(traced_model, traced_filename)
manifest.update({n: [traced_filename]})
@@ -78,13 +63,15 @@
else:
for n, m in BENCHMARK_MODELS.items():
scripted_filename = "models/" + n + "_scripted.jit.pt"
traced_filename = "models/" + n + "_traced.jit.pt"
# Check if model file exists on disk
- if (m["path"] == "both" and os.path.exists(scripted_filename) and os.path.exists(traced_filename)) or \
- (m["path"] == "script" and os.path.exists(scripted_filename)) or \
- (m["path"] == "trace" and os.path.exists(traced_filename)):
+ if (
+ (m["path"] == "both" and os.path.exists(scripted_filename) and os.path.exists(traced_filename))
+ or (m["path"] == "script" and os.path.exists(scripted_filename))
+ or (m["path"] == "trace" and os.path.exists(traced_filename))
+ ):
print("Skipping {} ".format(n))
continue
manifest = get(n, m, manifest)
@@ -96,31 +83,35 @@
# Check if Manifest file exists or is empty
if not os.path.exists(MANIFEST_FILE) or os.stat(MANIFEST_FILE).st_size == 0:
manifest = {"version": torch_version}
# Creating an empty manifest file for overwriting post setup
- os.system('touch {}'.format(MANIFEST_FILE))
+ os.system("touch {}".format(MANIFEST_FILE))
else:
manifest_exists = True
# Load manifest if already exists
- with open(MANIFEST_FILE, 'r') as f:
+ with open(MANIFEST_FILE, "r") as f:
manifest = json.load(f)
- if manifest['version'] == torch_version:
+ if manifest["version"] == torch_version:
version_matches = True
else:
- print("Torch version: {} mismatches \
+ print(
+ "Torch version: {} mismatches \
with manifest's version: {}. Re-downloading \
- all models".format(torch_version, manifest['version']))
+ all models".format(
+ torch_version, manifest["version"]
+ )
+ )
# Overwrite the manifest version as current torch version
- manifest['version'] = torch_version
+ manifest["version"] = torch_version
download_models(version_matches, manifest)
# Write updated manifest file to disk
- with open(MANIFEST_FILE, 'r+') as f:
+ with open(MANIFEST_FILE, "r+") as f:
data = f.read()
f.seek(0)
record = json.dumps(manifest)
f.write(record)
f.truncate()
--- tools/perf/utils.py 2022-08-11 18:16:56.670388 +0000
+++ tools/perf/utils.py 2022-08-11 18:20:40.318912 +0000
@@ -3,57 +3,46 @@
import custom_models as cm
import torchvision.models as models
import timm
BENCHMARK_MODELS = {
- "vgg16": {
- "model": models.vgg16(pretrained=True),
- "path": "script"
- },
- "resnet50": {
- "model": torch.hub.load('pytorch/vision:v0.9.0', 'resnet50', pretrained=True),
- "path": "script"
- },
- "efficientnet_b0": {
- "model": timm.create_model('efficientnet_b0', pretrained=True),
- "path": "script"
- },
- "vit": {
- "model": timm.create_model('vit_base_patch16_224', pretrained=True),
- "path": "script"
- },
- "bert_base_uncased": {
- "model": cm.BertModule(),
- "path": "trace"
- },
+ "vgg16": {"model": models.vgg16(pretrained=True), "path": "script"},
+ "resnet50": {"model": torch.hub.load("pytorch/vision:v0.9.0", "resnet50", pretrained=True), "path": "script"},
+ "efficientnet_b0": {"model": timm.create_model("efficientnet_b0", pretrained=True), "path": "script"},
+ "vit": {"model": timm.create_model("vit_base_patch16_224", pretrained=True), "path": "script"},
+ "bert_base_uncased": {"model": cm.BertModule(), "path": "trace"},
}
+
def precision_to_dtype(pr):
- if pr == 'fp32':
+ if pr == "fp32":
return torch.float
- elif pr == 'fp16' or pr == 'half':
+ elif pr == "fp16" or pr == "half":
return torch.half
- elif pr == 'int32':
+ elif pr == "int32":
return torch.int32
- elif pr == 'bool':
+ elif pr == "bool":
return torch.bool
else:
return torch.float32
+
def parse_inputs(user_inputs, dtype):
- parsed_inputs = user_inputs.split(';')
+ parsed_inputs = user_inputs.split(";")
torchtrt_inputs = []
for input in parsed_inputs:
input_shape = []
- input_shape_and_dtype = input.split('@')
+ input_shape_and_dtype = input.split("@")
dtype = precision_to_dtype(input_shape_and_dtype[1]) if len(input_shape_and_dtype) == 2 else dtype
- for input_dim in input_shape_and_dtype[0][1:-1].split(','):
+ for input_dim in input_shape_and_dtype[0][1:-1].split(","):
input_shape.append(int(input_dim))
torchtrt_inputs.append(torch.randint(0, 5, input_shape, dtype=dtype).cuda())
return torchtrt_inputs
+
def parse_backends(backends):
- return backends.split(',')
+ return backends.split(",")
+
def parse_precisions(precisions):
- return precisions.split(',')
+ return precisions.split(",")
--- tools/perf/perf_run.py 2022-08-11 18:16:56.670388 +0000
+++ tools/perf/perf_run.py 2022-08-11 18:20:40.499282 +0000
@@ -42,18 +42,21 @@
# Retrieves the value from the configuration else uses default values
def get(self, key, default_value=None):
if not key in self.params:
if not default_value:
- raise ValueError('Key {} is not present and default_value is not configured. Please run it with default value', key)
+ raise ValueError(
+ "Key {} is not present and default_value is not configured. Please run it with default value", key
+ )
self.params[key] = default_value
return self.params[key]
+
# Runs inference using Torch backend
def run_torch(model, input_tensors, params, precision, batch_size):
print("Running Torch for precision: ", precision, " batch_size : ", batch_size)
- iters = params.get('iterations', 20)
+ iters = params.get("iterations", 20)
# Warm up
with torch.no_grad():
for _ in range(WARMUP_ITER):
features = model(*input_tensors)
@@ -69,29 +72,30 @@
end_time = timeit.default_timer()
meas_time = end_time - start_time
timings.append(meas_time)
recordStats("Torch", timings, precision, batch_size)
+
# Runs inference using Torch-TensorRT backend
def run_torch_tensorrt(model, input_tensors, params, precision, truncate_long_and_double, batch_size):
print("Running Torch-TensorRT for precision: ", precision, " batch_size : ", batch_size)
# Compiling Torch-TensorRT model
compile_settings = {
- "inputs": input_tensors,
- "enabled_precisions": {precision_to_dtype(precision)} ,
- "truncate_long_and_double": truncate_long_and_double,
- "min_block_size" : 1,
+ "inputs": input_tensors,
+ "enabled_precisions": {precision_to_dtype(precision)},
+ "truncate_long_and_double": truncate_long_and_double,
+ "min_block_size": 1,
}
- if precision == 'int8':
- compile_settings.update({"calib": params.get('calibration_cache')})
+ if precision == "int8":
+ compile_settings.update({"calib": params.get("calibration_cache")})
with torchtrt.logging.errors():
model = torchtrt.compile(model, **compile_settings)
- iters = params.get('iterations', 20)
+ iters = params.get("iterations", 20)
# Warm up
with torch.no_grad():
for _ in range(WARMUP_ITER):
features = model(*input_tensors)
@@ -106,10 +110,11 @@
end_time = timeit.default_timer()
meas_time = end_time - start_time
timings.append(meas_time)
recordStats("Torch-TensorRT", timings, precision, batch_size)
+
# Runs inference using FX2TRT backend
def run_fx2trt(model, input_tensors, params, precision, batch_size):
print("Running FX2TRT for precision: ", precision, " batch_size : ", batch_size)
if precision == "fp32":
@@ -125,11 +130,11 @@
max_batch_size=batch_size,
lower_precision=precision,
verbose_log=False,
)
- iters = params.get('iterations', 20)
+ iters = params.get("iterations", 20)
# Warm up
with torch.no_grad():
for _ in range(WARMUP_ITER):
features = model(*input_tensors)
@@ -144,10 +149,11 @@
end_time = timeit.default_timer()
meas_time = end_time - start_time
timings.append(meas_time)
recordStats("FX-TensorRT", timings, precision, batch_size)
+
def torch_dtype_from_trt(dtype):
if dtype == trt.int8:
return torch.int8
elif dtype == trt.bool:
@@ -159,20 +165,23 @@
elif dtype == trt.float32:
return torch.float32
else:
raise TypeError("%s is not supported by torch" % dtype)
+
def torch_device_from_trt(device):
if device == trt.TensorLocation.DEVICE:
return torch.device("cuda")
elif device == trt.TensorLocation.HOST:
return torch.device("cpu")
else:
return TypeError("%s is not supported by torch" % device)
-def run_tensorrt(model, input_tensors, params, precision, truncate_long_and_double=False, is_trt_engine=False, batch_size=1):
+def run_tensorrt(
+ model, input_tensors, params, precision, truncate_long_and_double=False, is_trt_engine=False, batch_size=1
+):
engine = None
# If the model file is a TensorRT engine then directly deserialize and run inference
# else convert the torch module to a TensorRT engine first and then run inference
if not is_trt_engine:
@@ -189,16 +198,16 @@
# Deserialize the TensorRT engine
with trt.Logger() as logger, trt.Runtime(logger) as runtime:
engine = runtime.deserialize_cuda_engine(model)
print("Running TensorRT for precision: ", precision, " batch_size : ", batch_size)
- iters = params.get('iterations', 20)
+ iters = params.get("iterations", 20)
# Compiling the bindings
bindings = engine.num_bindings * [None]
k = 0
- for idx,_ in enumerate(bindings):
+ for idx, _ in enumerate(bindings):
dtype = torch_dtype_from_trt(engine.get_binding_dtype(idx))
shape = tuple(engine.get_binding_shape(idx))
device = torch_device_from_trt(engine.get_location(idx))
if not engine.binding_is_input(idx):
# Output bindings
@@ -223,23 +232,26 @@
meas_time = end_time - start_time
timings.append(meas_time)
recordStats("TensorRT", timings, precision, batch_size)
+
# Deploys inference run for different backend configurations
-def run(model, backends, input_tensors, params, precision, truncate_long_and_double=False, batch_size=1, is_trt_engine=False):
+def run(
+ model, backends, input_tensors, params, precision, truncate_long_and_double=False, batch_size=1, is_trt_engine=False
+):
for backend in backends:
- if precision == 'int8':
- if backend == 'all' or backend == 'torch':
+ if precision == "int8":
+ if backend == "all" or backend == "torch":
print("int8 precision is not supported for torch runtime in this script yet")
return False
- if backend == 'all' or backend == 'torch_tensorrt' or params.get('calibration_cache', None) == None:
+ if backend == "all" or backend == "torch_tensorrt" or params.get("calibration_cache", None) == None:
print("int8 precision expects calibration cache file for inference")
return False
- if backend == 'all':
+ if backend == "all":
run_torch(model, input_tensors, params, precision, batch_size)
run_torch_tensorrt(model, input_tensors, params, precision, truncate_long_and_double, batch_size)
run_tensorrt(model, input_tensors, params, precision, truncate_long_and_double, is_trt_engine, batch_size)
elif backend == "torch":
@@ -252,12 +264,13 @@
run_fx2trt(model, input_tensors, params, precision, batch_size)
elif backend == "tensorrt":
run_tensorrt(model, input_tensors, params, precision, truncate_long_and_double, is_trt_engine, batch_size)
+
# Generate report
-def recordStats(backend, timings, precision, batch_size = 1):
+def recordStats(backend, timings, precision, batch_size=1):
times = np.array(timings)
steps = len(times)
speeds = batch_size / times
time_mean = np.mean(times)
time_med = np.median(times)
@@ -265,55 +278,77 @@
time_std = np.std(times, ddof=0)
speed_mean = np.mean(speeds)
speed_med = np.median(speeds)
stats = {
- 'Backend' : backend,
- 'Precision' : precision,
- 'Batch size' : batch_size,
- 'Median(FPS)' : speed_med,
- 'Mean(FPS)' : speed_mean,
- 'Median-Latency(ms)' : time_med,
- 'Mean-Latency(ms)' : time_mean,
+ "Backend": backend,
+ "Precision": precision,
+ "Batch size": batch_size,
+ "Median(FPS)": speed_med,
+ "Mean(FPS)": speed_mean,
+ "Median-Latency(ms)": time_med,
+ "Mean-Latency(ms)": time_mean,
}
results.append(stats)
+
def load_model(params):
model = None
is_trt_engine = False
# Load torch model traced/scripted
- model_file = params.get('model').get('filename')
- try :
- model_name = params.get('model').get('name')
+ model_file = params.get("model").get("filename")
+ try:
+ model_name = params.get("model").get("name")
except:
model_name = model_file
print("Loading model: ", model_file)
- if model_file.endswith('.plan'):
+ if model_file.endswith(".plan"):
is_trt_engine = True
# Read the TensorRT engine file
- with open(model_file, 'rb') as fin:
+ with open(model_file, "rb") as fin:
model = fin.read()
else:
model = torch.jit.load(model_file).cuda()
return model, model_name, is_trt_engine
-if __name__ == '__main__':
+if __name__ == "__main__":
arg_parser = argparse.ArgumentParser(description="Run inference on a model with random input values")
- arg_parser.add_argument("--config", type=str, help="Load YAML based configuration file to run the inference. If this is used other params will be ignored")
+ arg_parser.add_argument(
+ "--config",
+ type=str,
+ help="Load YAML based configuration file to run the inference. If this is used other params will be ignored",
+ )
# The following options are manual user provided settings
- arg_parser.add_argument("--backends", type=str, help="Comma separated string of backends. Eg: torch,torch_tensorrt,tensorrt")
+ arg_parser.add_argument(
+ "--backends", type=str, help="Comma separated string of backends. Eg: torch,torch_tensorrt,tensorrt"
+ )
arg_parser.add_argument("--model", type=str, help="Name of the model file")
- arg_parser.add_argument("--inputs", type=str, help="List of input shapes. Eg: (1, 3, 224, 224)@fp32 for Resnet or (1, 128)@int32;(1, 128)@int32 for BERT")
+ arg_parser.add_argument(
+ "--inputs",
+ type=str,
+ help="List of input shapes. Eg: (1, 3, 224, 224)@fp32 for Resnet or (1, 128)@int32;(1, 128)@int32 for BERT",
+ )
arg_parser.add_argument("--batch_size", type=int, default=1, help="Batch size to build and run")
- arg_parser.add_argument("--precision", default="fp32", type=str, help="Comma separated list of precisions to build TensorRT engine Eg: fp32,fp16")
+ arg_parser.add_argument(
+ "--precision",
+ default="fp32",
+ type=str,
+ help="Comma separated list of precisions to build TensorRT engine Eg: fp32,fp16",
+ )
arg_parser.add_argument("--calibration_cache", type=str, help="Name of the calibration cache file")
arg_parser.add_argument("--device", type=int, help="device id")
- arg_parser.add_argument("--truncate", action='store_true', help="Truncate long and double weights in the network in Torch-TensorRT")
- arg_parser.add_argument("--is_trt_engine", action='store_true', help="Boolean flag to determine if the user provided model is a TRT engine or not")
+ arg_parser.add_argument(
+ "--truncate", action="store_true", help="Truncate long and double weights in the network in Torch-TensorRT"
+ )
+ arg_parser.add_argument(
+ "--is_trt_engine",
+ action="store_true",
+ help="Boolean flag to determine if the user provided model is a TRT engine or not",
+ )
arg_parser.add_argument("--report", type=str, help="Path of the output file where performance summary is written.")
args = arg_parser.parse_args()
cudnn.benchmark = True
# Create random input tensor of certain size
@@ -324,59 +359,69 @@
# Load YAML params
params = parser.read_config()
model, model_name, is_trt_engine = load_model(params)
# Default device is set to 0. Configurable using yaml config file.
- torch.cuda.set_device(params.get('runtime').get('device', 0))
-
- num_input = params.get('input').get('num_inputs')
- truncate_long_and_double = params.get('runtime').get('truncate_long_and_double', False)
- batch_size = params.get('input').get('batch_size', 1)
- for precision in params.get('runtime').get('precision', 'fp32'):
+ torch.cuda.set_device(params.get("runtime").get("device", 0))
+
+ num_input = params.get("input").get("num_inputs")
+ truncate_long_and_double = params.get("runtime").get("truncate_long_and_double", False)
+ batch_size = params.get("input").get("batch_size", 1)
+ for precision in params.get("runtime").get("precision", "fp32"):
input_tensors = []
- num_input = params.get('input').get('num_inputs', 1)
+ num_input = params.get("input").get("num_inputs", 1)
for i in range(num_input):
- inp_tensor = params.get('input').get('input' + str(i))
- input_tensors.append(torch.randint(0, 2, tuple(d for d in inp_tensor), dtype=precision_to_dtype(precision)).cuda())
+ inp_tensor = params.get("input").get("input" + str(i))
+ input_tensors.append(
+ torch.randint(0, 2, tuple(d for d in inp_tensor), dtype=precision_to_dtype(precision)).cuda()
+ )
if is_trt_engine:
- print("Warning, TensorRT engine file is configured. Please make sure the precision matches with the TRT engine for reliable results")
+ print(
+ "Warning, TensorRT engine file is configured. Please make sure the precision matches with the TRT engine for reliable results"
+ )
if not is_trt_engine and (precision == "fp16" or precision == "half"):
# If model is TensorRT serialized engine then model.half will report failure
model = model.half()
- backends = params.get('backend')
+ backends = params.get("backend")
# Run inference
- status = run(model, backends, input_tensors, params, precision, truncate_long_and_double, batch_size, is_trt_engine)
+ status = run(
+ model, backends, input_tensors, params, precision, truncate_long_and_double, batch_size, is_trt_engine
+ )
else:
params = vars(args)
- model_name = params['model']
+ model_name = params["model"]
if os.path.exists(model_name):
print("Loading user provided model: ", model_name)
model = torch.jit.load(model_name).cuda().eval()
elif model_name in BENCHMARK_MODELS:
- model = BENCHMARK_MODELS[model_name]['model'].eval().cuda()
+ model = BENCHMARK_MODELS[model_name]["model"].eval().cuda()
else:
- raise ValueError("Invalid model name. Please provide a torchscript model file or model name (among the following options vgg16|resnet50|efficientnet_b0|vit)")
-
- backends = parse_backends(params['backends'])
- truncate_long_and_double = params['truncate']
- batch_size = params['batch_size']
- is_trt_engine = params['is_trt_engine']
- precisions = parse_precisions(params['precision'])
+ raise ValueError(
+ "Invalid model name. Please provide a torchscript model file or model name (among the following options vgg16|resnet50|efficientnet_b0|vit)"
+ )
+
+ backends = parse_backends(params["backends"])
+ truncate_long_and_double = params["truncate"]
+ batch_size = params["batch_size"]
+ is_trt_engine = params["is_trt_engine"]
+ precisions = parse_precisions(params["precision"])
for precision in precisions:
- input_tensors = parse_inputs(params['inputs'], precision_to_dtype(precision))
+ input_tensors = parse_inputs(params["inputs"], precision_to_dtype(precision))
if not is_trt_engine and (precision == "fp16" or precision == "half"):
# If model is TensorRT serialized engine then model.half will report failure
model = model.half()
- status = run(model, backends, input_tensors, params, precision, truncate_long_and_double, batch_size, is_trt_engine)
+ status = run(
+ model, backends, input_tensors, params, precision, truncate_long_and_double, batch_size, is_trt_engine
+ )
# Generate report
- print('Model Summary: ', model_name)
+ print("Model Summary: ", model_name)
summary = pd.DataFrame(results)
print(summary)
- with open(args.report, 'w') as file:
- file.write('Model Summary: ' + model_name + '\n')
+ with open(args.report, "w") as file:
+ file.write("Model Summary: " + model_name + "\n")
file.write(summary.to_string())
file.close()
Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are some changes that do not conform to Python style guidelines:
--- tools/perf/custom_models.py 2022-08-11 18:29:37.537228 +0000
+++ tools/perf/custom_models.py 2022-08-11 18:32:55.129614 +0000
@@ -1,9 +1,10 @@
import torch
import torch.nn as nn
from transformers import BertModel, BertTokenizer, BertConfig
import torch.nn.functional as F
+
def BertModule():
model_name = "bert-base-uncased"
enc = BertTokenizer.from_pretrained(model_name)
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
--- tools/perf/hub.py 2022-08-11 18:29:37.537228 +0000
+++ tools/perf/hub.py 2022-08-11 18:32:55.208488 +0000
@@ -15,40 +15,25 @@
# Detect case of no GPU before deserialization of models on GPU
if not torch.cuda.is_available():
raise Exception("No GPU found. Please check if installed torch version is compatible with CUDA version")
# Downloads all model files again if manifest file is not present
-MANIFEST_FILE = 'model_manifest.json'
+MANIFEST_FILE = "model_manifest.json"
BENCHMARK_MODELS = {
- "vgg16": {
- "model": models.vgg16(weights=None),
- "path": "script"
- },
- "resnet50": {
- "model": models.resnet50(weights=None),
- "path": "script"
- },
- "efficientnet_b0": {
- "model": timm.create_model('efficientnet_b0', pretrained=True),
- "path": "script"
- },
- "vit": {
- "model": timm.create_model('vit_base_patch16_224', pretrained=True),
- "path": "script"
- },
- "bert_base_uncased": {
- "model": cm.BertModule(),
- "path": "trace"
- },
+ "vgg16": {"model": models.vgg16(weights=None), "path": "script"},
+ "resnet50": {"model": models.resnet50(weights=None), "path": "script"},
+ "efficientnet_b0": {"model": timm.create_model("efficientnet_b0", pretrained=True), "path": "script"},
+ "vit": {"model": timm.create_model("vit_base_patch16_224", pretrained=True), "path": "script"},
+ "bert_base_uncased": {"model": cm.BertModule(), "path": "trace"},
}
def get(n, m, manifest):
print("Downloading {}".format(n))
- traced_filename = "models/" + n + '_traced.jit.pt'
- script_filename = "models/" + n + '_scripted.jit.pt'
+ traced_filename = "models/" + n + "_traced.jit.pt"
+ script_filename = "models/" + n + "_scripted.jit.pt"
x = torch.ones((1, 3, 300, 300)).cuda()
if n == "bert-base-uncased":
traced_model = m["model"]
torch.jit.save(traced_model, traced_filename)
manifest.update({n: [traced_filename]})
@@ -78,13 +63,15 @@
else:
for n, m in BENCHMARK_MODELS.items():
scripted_filename = "models/" + n + "_scripted.jit.pt"
traced_filename = "models/" + n + "_traced.jit.pt"
# Check if model file exists on disk
- if (m["path"] == "both" and os.path.exists(scripted_filename) and os.path.exists(traced_filename)) or \
- (m["path"] == "script" and os.path.exists(scripted_filename)) or \
- (m["path"] == "trace" and os.path.exists(traced_filename)):
+ if (
+ (m["path"] == "both" and os.path.exists(scripted_filename) and os.path.exists(traced_filename))
+ or (m["path"] == "script" and os.path.exists(scripted_filename))
+ or (m["path"] == "trace" and os.path.exists(traced_filename))
+ ):
print("Skipping {} ".format(n))
continue
manifest = get(n, m, manifest)
@@ -96,31 +83,35 @@
# Check if Manifest file exists or is empty
if not os.path.exists(MANIFEST_FILE) or os.stat(MANIFEST_FILE).st_size == 0:
manifest = {"version": torch_version}
# Creating an empty manifest file for overwriting post setup
- os.system('touch {}'.format(MANIFEST_FILE))
+ os.system("touch {}".format(MANIFEST_FILE))
else:
manifest_exists = True
# Load manifest if already exists
- with open(MANIFEST_FILE, 'r') as f:
+ with open(MANIFEST_FILE, "r") as f:
manifest = json.load(f)
- if manifest['version'] == torch_version:
+ if manifest["version"] == torch_version:
version_matches = True
else:
- print("Torch version: {} mismatches \
+ print(
+ "Torch version: {} mismatches \
with manifest's version: {}. Re-downloading \
- all models".format(torch_version, manifest['version']))
+ all models".format(
+ torch_version, manifest["version"]
+ )
+ )
# Overwrite the manifest version as current torch version
- manifest['version'] = torch_version
+ manifest["version"] = torch_version
download_models(version_matches, manifest)
# Write updated manifest file to disk
- with open(MANIFEST_FILE, 'r+') as f:
+ with open(MANIFEST_FILE, "r+") as f:
data = f.read()
f.seek(0)
record = json.dumps(manifest)
f.write(record)
f.truncate()
--- tools/perf/utils.py 2022-08-11 18:29:37.537228 +0000
+++ tools/perf/utils.py 2022-08-11 18:32:55.239338 +0000
@@ -3,57 +3,46 @@
import custom_models as cm
import torchvision.models as models
import timm
BENCHMARK_MODELS = {
- "vgg16": {
- "model": models.vgg16(pretrained=True),
- "path": "script"
- },
- "resnet50": {
- "model": torch.hub.load('pytorch/vision:v0.9.0', 'resnet50', pretrained=True),
- "path": "script"
- },
- "efficientnet_b0": {
- "model": timm.create_model('efficientnet_b0', pretrained=True),
- "path": "script"
- },
- "vit": {
- "model": timm.create_model('vit_base_patch16_224', pretrained=True),
- "path": "script"
- },
- "bert_base_uncased": {
- "model": cm.BertModule(),
- "path": "trace"
- },
+ "vgg16": {"model": models.vgg16(pretrained=True), "path": "script"},
+ "resnet50": {"model": torch.hub.load("pytorch/vision:v0.9.0", "resnet50", pretrained=True), "path": "script"},
+ "efficientnet_b0": {"model": timm.create_model("efficientnet_b0", pretrained=True), "path": "script"},
+ "vit": {"model": timm.create_model("vit_base_patch16_224", pretrained=True), "path": "script"},
+ "bert_base_uncased": {"model": cm.BertModule(), "path": "trace"},
}
+
def precision_to_dtype(pr):
- if pr == 'fp32':
+ if pr == "fp32":
return torch.float
- elif pr == 'fp16' or pr == 'half':
+ elif pr == "fp16" or pr == "half":
return torch.half
- elif pr == 'int32':
+ elif pr == "int32":
return torch.int32
- elif pr == 'bool':
+ elif pr == "bool":
return torch.bool
else:
return torch.float32
+
def parse_inputs(user_inputs, dtype):
- parsed_inputs = user_inputs.split(';')
+ parsed_inputs = user_inputs.split(";")
torchtrt_inputs = []
for input in parsed_inputs:
input_shape = []
- input_shape_and_dtype = input.split('@')
+ input_shape_and_dtype = input.split("@")
dtype = precision_to_dtype(input_shape_and_dtype[1]) if len(input_shape_and_dtype) == 2 else dtype
- for input_dim in input_shape_and_dtype[0][1:-1].split(','):
+ for input_dim in input_shape_and_dtype[0][1:-1].split(","):
input_shape.append(int(input_dim))
torchtrt_inputs.append(torch.randint(0, 5, input_shape, dtype=dtype).cuda())
return torchtrt_inputs
+
def parse_backends(backends):
- return backends.split(',')
+ return backends.split(",")
+
def parse_precisions(precisions):
- return precisions.split(',')
+ return precisions.split(",")
--- tools/perf/perf_run.py 2022-08-11 18:29:37.537228 +0000
+++ tools/perf/perf_run.py 2022-08-11 18:32:55.330922 +0000
@@ -42,18 +42,21 @@
# Retrieves the value from the configuration else uses default values
def get(self, key, default_value=None):
if not key in self.params:
if not default_value:
- raise ValueError('Key {} is not present and default_value is not configured. Please run it with default value', key)
+ raise ValueError(
+ "Key {} is not present and default_value is not configured. Please run it with default value", key
+ )
self.params[key] = default_value
return self.params[key]
+
# Runs inference using Torch backend
def run_torch(model, input_tensors, params, precision, batch_size):
print("Running Torch for precision: ", precision, " batch_size : ", batch_size)
- iters = params.get('iterations', 20)
+ iters = params.get("iterations", 20)
# Warm up
with torch.no_grad():
for _ in range(WARMUP_ITER):
features = model(*input_tensors)
@@ -69,29 +72,30 @@
end_time = timeit.default_timer()
meas_time = end_time - start_time
timings.append(meas_time)
recordStats("Torch", timings, precision, batch_size)
+
# Runs inference using Torch-TensorRT backend
def run_torch_tensorrt(model, input_tensors, params, precision, truncate_long_and_double, batch_size):
print("Running Torch-TensorRT for precision: ", precision, " batch_size : ", batch_size)
# Compiling Torch-TensorRT model
compile_settings = {
- "inputs": input_tensors,
- "enabled_precisions": {precision_to_dtype(precision)} ,
- "truncate_long_and_double": truncate_long_and_double,
- "min_block_size" : 1,
+ "inputs": input_tensors,
+ "enabled_precisions": {precision_to_dtype(precision)},
+ "truncate_long_and_double": truncate_long_and_double,
+ "min_block_size": 1,
}
- if precision == 'int8':
- compile_settings.update({"calib": params.get('calibration_cache')})
+ if precision == "int8":
+ compile_settings.update({"calib": params.get("calibration_cache")})
with torchtrt.logging.errors():
model = torchtrt.compile(model, **compile_settings)
- iters = params.get('iterations', 20)
+ iters = params.get("iterations", 20)
# Warm up
with torch.no_grad():
for _ in range(WARMUP_ITER):
features = model(*input_tensors)
@@ -106,10 +110,11 @@
end_time = timeit.default_timer()
meas_time = end_time - start_time
timings.append(meas_time)
recordStats("Torch-TensorRT", timings, precision, batch_size)
+
# Runs inference using FX2TRT backend
def run_fx2trt(model, input_tensors, params, precision, batch_size):
print("Running FX2TRT for precision: ", precision, " batch_size : ", batch_size)
if precision == "fp32":
@@ -125,11 +130,11 @@
max_batch_size=batch_size,
lower_precision=precision,
verbose_log=False,
)
- iters = params.get('iterations', 20)
+ iters = params.get("iterations", 20)
# Warm up
with torch.no_grad():
for _ in range(WARMUP_ITER):
features = model(*input_tensors)
@@ -144,10 +149,11 @@
end_time = timeit.default_timer()
meas_time = end_time - start_time
timings.append(meas_time)
recordStats("FX-TensorRT", timings, precision, batch_size)
+
def torch_dtype_from_trt(dtype):
if dtype == trt.int8:
return torch.int8
elif dtype == trt.bool:
@@ -159,20 +165,23 @@
elif dtype == trt.float32:
return torch.float32
else:
raise TypeError("%s is not supported by torch" % dtype)
+
def torch_device_from_trt(device):
if device == trt.TensorLocation.DEVICE:
return torch.device("cuda")
elif device == trt.TensorLocation.HOST:
return torch.device("cpu")
else:
return TypeError("%s is not supported by torch" % device)
-def run_tensorrt(model, input_tensors, params, precision, truncate_long_and_double=False, is_trt_engine=False, batch_size=1):
+def run_tensorrt(
+ model, input_tensors, params, precision, truncate_long_and_double=False, is_trt_engine=False, batch_size=1
+):
engine = None
# If the model file is a TensorRT engine then directly deserialize and run inference
# else convert the torch module to a TensorRT engine first and then run inference
if not is_trt_engine:
@@ -189,16 +198,16 @@
# Deserialize the TensorRT engine
with trt.Logger() as logger, trt.Runtime(logger) as runtime:
engine = runtime.deserialize_cuda_engine(model)
print("Running TensorRT for precision: ", precision, " batch_size : ", batch_size)
- iters = params.get('iterations', 20)
+ iters = params.get("iterations", 20)
# Compiling the bindings
bindings = engine.num_bindings * [None]
k = 0
- for idx,_ in enumerate(bindings):
+ for idx, _ in enumerate(bindings):
dtype = torch_dtype_from_trt(engine.get_binding_dtype(idx))
shape = tuple(engine.get_binding_shape(idx))
device = torch_device_from_trt(engine.get_location(idx))
if not engine.binding_is_input(idx):
# Output bindings
@@ -223,23 +232,26 @@
meas_time = end_time - start_time
timings.append(meas_time)
recordStats("TensorRT", timings, precision, batch_size)
+
# Deploys inference run for different backend configurations
-def run(model, backends, input_tensors, params, precision, truncate_long_and_double=False, batch_size=1, is_trt_engine=False):
+def run(
+ model, backends, input_tensors, params, precision, truncate_long_and_double=False, batch_size=1, is_trt_engine=False
+):
for backend in backends:
- if precision == 'int8':
- if backend == 'all' or backend == 'torch':
+ if precision == "int8":
+ if backend == "all" or backend == "torch":
print("int8 precision is not supported for torch runtime in this script yet")
return False
- if backend == 'all' or backend == 'torch_tensorrt' or params.get('calibration_cache', None) == None:
+ if backend == "all" or backend == "torch_tensorrt" or params.get("calibration_cache", None) == None:
print("int8 precision expects calibration cache file for inference")
return False
- if backend == 'all':
+ if backend == "all":
run_torch(model, input_tensors, params, precision, batch_size)
run_torch_tensorrt(model, input_tensors, params, precision, truncate_long_and_double, batch_size)
run_tensorrt(model, input_tensors, params, precision, truncate_long_and_double, is_trt_engine, batch_size)
elif backend == "torch":
@@ -252,12 +264,13 @@
run_fx2trt(model, input_tensors, params, precision, batch_size)
elif backend == "tensorrt":
run_tensorrt(model, input_tensors, params, precision, truncate_long_and_double, is_trt_engine, batch_size)
+
# Generate report
-def recordStats(backend, timings, precision, batch_size = 1):
+def recordStats(backend, timings, precision, batch_size=1):
times = np.array(timings)
steps = len(times)
speeds = batch_size / times
time_mean = np.mean(times)
time_med = np.median(times)
@@ -265,55 +278,77 @@
time_std = np.std(times, ddof=0)
speed_mean = np.mean(speeds)
speed_med = np.median(speeds)
stats = {
- 'Backend' : backend,
- 'Precision' : precision,
- 'Batch size' : batch_size,
- 'Median(FPS)' : speed_med,
- 'Mean(FPS)' : speed_mean,
- 'Median-Latency(ms)' : time_med,
- 'Mean-Latency(ms)' : time_mean,
+ "Backend": backend,
+ "Precision": precision,
+ "Batch size": batch_size,
+ "Median(FPS)": speed_med,
+ "Mean(FPS)": speed_mean,
+ "Median-Latency(ms)": time_med,
+ "Mean-Latency(ms)": time_mean,
}
results.append(stats)
+
def load_model(params):
model = None
is_trt_engine = False
# Load torch model traced/scripted
- model_file = params.get('model').get('filename')
- try :
- model_name = params.get('model').get('name')
+ model_file = params.get("model").get("filename")
+ try:
+ model_name = params.get("model").get("name")
except:
model_name = model_file
print("Loading model: ", model_file)
- if model_file.endswith('.plan'):
+ if model_file.endswith(".plan"):
is_trt_engine = True
# Read the TensorRT engine file
- with open(model_file, 'rb') as fin:
+ with open(model_file, "rb") as fin:
model = fin.read()
else:
model = torch.jit.load(model_file).cuda()
return model, model_name, is_trt_engine
-if __name__ == '__main__':
+if __name__ == "__main__":
arg_parser = argparse.ArgumentParser(description="Run inference on a model with random input values")
- arg_parser.add_argument("--config", type=str, help="Load YAML based configuration file to run the inference. If this is used other params will be ignored")
+ arg_parser.add_argument(
+ "--config",
+ type=str,
+ help="Load YAML based configuration file to run the inference. If this is used other params will be ignored",
+ )
# The following options are manual user provided settings
- arg_parser.add_argument("--backends", type=str, help="Comma separated string of backends. Eg: torch,torch_tensorrt,fx2trt,tensorrt")
+ arg_parser.add_argument(
+ "--backends", type=str, help="Comma separated string of backends. Eg: torch,torch_tensorrt,fx2trt,tensorrt"
+ )
arg_parser.add_argument("--model", type=str, help="Name of the model file")
- arg_parser.add_argument("--inputs", type=str, help="List of input shapes. Eg: (1, 3, 224, 224)@fp32 for Resnet or (1, 128)@int32;(1, 128)@int32 for BERT")
+ arg_parser.add_argument(
+ "--inputs",
+ type=str,
+ help="List of input shapes. Eg: (1, 3, 224, 224)@fp32 for Resnet or (1, 128)@int32;(1, 128)@int32 for BERT",
+ )
arg_parser.add_argument("--batch_size", type=int, default=1, help="Batch size to build and run")
- arg_parser.add_argument("--precision", default="fp32", type=str, help="Comma separated list of precisions to build TensorRT engine Eg: fp32,fp16")
+ arg_parser.add_argument(
+ "--precision",
+ default="fp32",
+ type=str,
+ help="Comma separated list of precisions to build TensorRT engine Eg: fp32,fp16",
+ )
arg_parser.add_argument("--calibration_cache", type=str, help="Name of the calibration cache file")
arg_parser.add_argument("--device", type=int, help="device id")
- arg_parser.add_argument("--truncate", action='store_true', help="Truncate long and double weights in the network in Torch-TensorRT")
- arg_parser.add_argument("--is_trt_engine", action='store_true', help="Boolean flag to determine if the user provided model is a TRT engine or not")
+ arg_parser.add_argument(
+ "--truncate", action="store_true", help="Truncate long and double weights in the network in Torch-TensorRT"
+ )
+ arg_parser.add_argument(
+ "--is_trt_engine",
+ action="store_true",
+ help="Boolean flag to determine if the user provided model is a TRT engine or not",
+ )
arg_parser.add_argument("--report", type=str, help="Path of the output file where performance summary is written.")
args = arg_parser.parse_args()
cudnn.benchmark = True
# Create random input tensor of certain size
@@ -324,59 +359,69 @@
# Load YAML params
params = parser.read_config()
model, model_name, is_trt_engine = load_model(params)
# Default device is set to 0. Configurable using yaml config file.
- torch.cuda.set_device(params.get('runtime').get('device', 0))
-
- num_input = params.get('input').get('num_inputs')
- truncate_long_and_double = params.get('runtime').get('truncate_long_and_double', False)
- batch_size = params.get('input').get('batch_size', 1)
- for precision in params.get('runtime').get('precision', 'fp32'):
+ torch.cuda.set_device(params.get("runtime").get("device", 0))
+
+ num_input = params.get("input").get("num_inputs")
+ truncate_long_and_double = params.get("runtime").get("truncate_long_and_double", False)
+ batch_size = params.get("input").get("batch_size", 1)
+ for precision in params.get("runtime").get("precision", "fp32"):
input_tensors = []
- num_input = params.get('input').get('num_inputs', 1)
+ num_input = params.get("input").get("num_inputs", 1)
for i in range(num_input):
- inp_tensor = params.get('input').get('input' + str(i))
- input_tensors.append(torch.randint(0, 2, tuple(d for d in inp_tensor), dtype=precision_to_dtype(precision)).cuda())
+ inp_tensor = params.get("input").get("input" + str(i))
+ input_tensors.append(
+ torch.randint(0, 2, tuple(d for d in inp_tensor), dtype=precision_to_dtype(precision)).cuda()
+ )
if is_trt_engine:
- print("Warning, TensorRT engine file is configured. Please make sure the precision matches with the TRT engine for reliable results")
+ print(
+ "Warning, TensorRT engine file is configured. Please make sure the precision matches with the TRT engine for reliable results"
+ )
if not is_trt_engine and (precision == "fp16" or precision == "half"):
# If model is TensorRT serialized engine then model.half will report failure
model = model.half()
- backends = params.get('backend')
+ backends = params.get("backend")
# Run inference
- status = run(model, backends, input_tensors, params, precision, truncate_long_and_double, batch_size, is_trt_engine)
+ status = run(
+ model, backends, input_tensors, params, precision, truncate_long_and_double, batch_size, is_trt_engine
+ )
else:
params = vars(args)
- model_name = params['model']
+ model_name = params["model"]
if os.path.exists(model_name):
print("Loading user provided model: ", model_name)
model = torch.jit.load(model_name).cuda().eval()
elif model_name in BENCHMARK_MODELS:
- model = BENCHMARK_MODELS[model_name]['model'].eval().cuda()
+ model = BENCHMARK_MODELS[model_name]["model"].eval().cuda()
else:
- raise ValueError("Invalid model name. Please provide a torchscript model file or model name (among the following options vgg16|resnet50|efficientnet_b0|vit)")
-
- backends = parse_backends(params['backends'])
- truncate_long_and_double = params['truncate']
- batch_size = params['batch_size']
- is_trt_engine = params['is_trt_engine']
- precisions = parse_precisions(params['precision'])
+ raise ValueError(
+ "Invalid model name. Please provide a torchscript model file or model name (among the following options vgg16|resnet50|efficientnet_b0|vit)"
+ )
+
+ backends = parse_backends(params["backends"])
+ truncate_long_and_double = params["truncate"]
+ batch_size = params["batch_size"]
+ is_trt_engine = params["is_trt_engine"]
+ precisions = parse_precisions(params["precision"])
for precision in precisions:
- input_tensors = parse_inputs(params['inputs'], precision_to_dtype(precision))
+ input_tensors = parse_inputs(params["inputs"], precision_to_dtype(precision))
if not is_trt_engine and (precision == "fp16" or precision == "half"):
# If model is TensorRT serialized engine then model.half will report failure
model = model.half()
- status = run(model, backends, input_tensors, params, precision, truncate_long_and_double, batch_size, is_trt_engine)
+ status = run(
+ model, backends, input_tensors, params, precision, truncate_long_and_double, batch_size, is_trt_engine
+ )
# Generate report
- print('Model Summary: ', model_name)
+ print("Model Summary: ", model_name)
summary = pd.DataFrame(results)
print(summary)
- with open(args.report, 'w') as file:
- file.write('Model Summary: ' + model_name + '\n')
+ with open(args.report, "w") as file:
+ file.write("Model Summary: " + model_name + "\n")
file.write(summary.to_string())
file.close()
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Code conforms to C++ style guidelines
I think this branch needs to be rebased seems like the pre-commit was not run |
Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Code conforms to Python style guidelines
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are some changes that do not conform to C++ style guidelines:
diff --git a/workspace/core/conversion/converters/impl/element_wise.cpp b/tmp/changes.txt
old mode 100755
new mode 100644
ERROR: Some files do not conform to style guidelines
Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>
Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>
Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>
Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Code conforms to C++ style guidelines
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are some changes that do not conform to Python style guidelines:
--- tools/perf/hub.py 2022-09-08 17:45:26.704885 +0000
+++ tools/perf/hub.py 2022-09-08 17:46:00.411687 +0000
@@ -12,20 +12,28 @@
torch_version = torch.__version__
# Detect case of no GPU before deserialization of models on GPU
if not torch.cuda.is_available():
- raise Exception("No GPU found. Please check if installed torch version is compatible with CUDA version")
+ raise Exception(
+ "No GPU found. Please check if installed torch version is compatible with CUDA version"
+ )
# Downloads all model files again if manifest file is not present
MANIFEST_FILE = "model_manifest.json"
BENCHMARK_MODELS = {
"vgg16": {"model": models.vgg16(weights=None), "path": "script"},
"resnet50": {"model": models.resnet50(weights=None), "path": "script"},
- "efficientnet_b0": {"model": timm.create_model("efficientnet_b0", pretrained=True), "path": "script"},
- "vit": {"model": timm.create_model("vit_base_patch16_224", pretrained=True), "path": "script"},
+ "efficientnet_b0": {
+ "model": timm.create_model("efficientnet_b0", pretrained=True),
+ "path": "script",
+ },
+ "vit": {
+ "model": timm.create_model("vit_base_patch16_224", pretrained=True),
+ "path": "script",
+ },
"bert_base_uncased": {"model": cm.BertModule(), "path": "trace"},
}
def get(n, m, manifest):
@@ -64,11 +72,15 @@
for n, m in BENCHMARK_MODELS.items():
scripted_filename = "models/" + n + "_scripted.jit.pt"
traced_filename = "models/" + n + "_traced.jit.pt"
# Check if model file exists on disk
if (
- (m["path"] == "both" and os.path.exists(scripted_filename) and os.path.exists(traced_filename))
+ (
+ m["path"] == "both"
+ and os.path.exists(scripted_filename)
+ and os.path.exists(traced_filename)
+ )
or (m["path"] == "script" and os.path.exists(scripted_filename))
or (m["path"] == "trace" and os.path.exists(traced_filename))
):
print("Skipping {} ".format(n))
continue
--- tools/perf/utils.py 2022-09-08 17:45:26.704885 +0000
+++ tools/perf/utils.py 2022-09-08 17:46:00.464733 +0000
@@ -4,13 +4,22 @@
import torchvision.models as models
import timm
BENCHMARK_MODELS = {
"vgg16": {"model": models.vgg16(pretrained=True), "path": "script"},
- "resnet50": {"model": torch.hub.load("pytorch/vision:v0.9.0", "resnet50", pretrained=True), "path": "script"},
- "efficientnet_b0": {"model": timm.create_model("efficientnet_b0", pretrained=True), "path": "script"},
- "vit": {"model": timm.create_model("vit_base_patch16_224", pretrained=True), "path": "script"},
+ "resnet50": {
+ "model": torch.hub.load("pytorch/vision:v0.9.0", "resnet50", pretrained=True),
+ "path": "script",
+ },
+ "efficientnet_b0": {
+ "model": timm.create_model("efficientnet_b0", pretrained=True),
+ "path": "script",
+ },
+ "vit": {
+ "model": timm.create_model("vit_base_patch16_224", pretrained=True),
+ "path": "script",
+ },
"bert_base_uncased": {"model": cm.BertModule(), "path": "trace"},
}
def precision_to_dtype(pr):
@@ -30,11 +39,15 @@
parsed_inputs = user_inputs.split(";")
torchtrt_inputs = []
for input in parsed_inputs:
input_shape = []
input_shape_and_dtype = input.split("@")
- dtype = precision_to_dtype(input_shape_and_dtype[1]) if len(input_shape_and_dtype) == 2 else dtype
+ dtype = (
+ precision_to_dtype(input_shape_and_dtype[1])
+ if len(input_shape_and_dtype) == 2
+ else dtype
+ )
for input_dim in input_shape_and_dtype[0][1:-1].split(","):
input_shape.append(int(input_dim))
torchtrt_inputs.append(torch.randint(0, 5, input_shape, dtype=dtype).cuda())
return torchtrt_inputs
--- tools/perf/perf_run.py 2022-09-08 17:45:26.704885 +0000
+++ tools/perf/perf_run.py 2022-09-08 17:46:00.602583 +0000
@@ -17,11 +17,17 @@
import torch_tensorrt as torchtrt
from torch_tensorrt.fx.lower import compile
from torch_tensorrt.fx.utils import LowerPrecision
import tensorrt as trt
-from utils import parse_inputs, parse_backends, precision_to_dtype, parse_precisions, BENCHMARK_MODELS
+from utils import (
+ parse_inputs,
+ parse_backends,
+ precision_to_dtype,
+ parse_precisions,
+ BENCHMARK_MODELS,
+)
WARMUP_ITER = 10
results = []
# YAML Parser class for parsing the run configurations
@@ -43,11 +49,12 @@
# Retrieves the value from the configuration else uses default values
def get(self, key, default_value=None):
if not key in self.params:
if not default_value:
raise ValueError(
- "Key {} is not present and default_value is not configured. Please run it with default value", key
+ "Key {} is not present and default_value is not configured. Please run it with default value",
+ key,
)
self.params[key] = default_value
return self.params[key]
@@ -75,12 +82,19 @@
recordStats("Torch", timings, precision, batch_size)
# Runs inference using Torch-TensorRT backend
-def run_torch_tensorrt(model, input_tensors, params, precision, truncate_long_and_double, batch_size):
- print("Running Torch-TensorRT for precision: ", precision, " batch_size : ", batch_size)
+def run_torch_tensorrt(
+ model, input_tensors, params, precision, truncate_long_and_double, batch_size
+):
+ print(
+ "Running Torch-TensorRT for precision: ",
+ precision,
+ " batch_size : ",
+ batch_size,
+ )
# Compiling Torch-TensorRT model
compile_settings = {
"inputs": input_tensors,
"enabled_precisions": {precision_to_dtype(precision)},
"truncate_long_and_double": truncate_long_and_double,
@@ -174,11 +188,17 @@
else:
return TypeError("%s is not supported by torch" % device)
def run_tensorrt(
- model, input_tensors, params, precision, truncate_long_and_double=False, is_trt_engine=False, batch_size=1
+ model,
+ input_tensors,
+ params,
+ precision,
+ truncate_long_and_double=False,
+ is_trt_engine=False,
+ batch_size=1,
):
engine = None
# If the model file is a TensorRT engine then directly deserialize and run inference
# else convert the torch module to a TensorRT engine first and then run inference
@@ -235,11 +255,18 @@
recordStats("TensorRT", timings, precision, batch_size)
# Deploys inference run for different backend configurations
def run(
- model, backends, input_tensors, params, precision, truncate_long_and_double=False, batch_size=1, is_trt_engine=False
+ model,
+ backends,
+ input_tensors,
+ params,
+ precision,
+ truncate_long_and_double=False,
+ batch_size=1,
+ is_trt_engine=False,
):
for backend in backends:
if precision == "int8":
if backend == "all" or backend == "torch":
print(
@@ -255,24 +282,54 @@
print("int8 precision expects calibration cache file for inference")
return False
if backend == "all":
run_torch(model, input_tensors, params, precision, batch_size)
- run_torch_tensorrt(model, input_tensors, params, precision, truncate_long_and_double, batch_size)
- run_tensorrt(model, input_tensors, params, precision, truncate_long_and_double, is_trt_engine, batch_size)
+ run_torch_tensorrt(
+ model,
+ input_tensors,
+ params,
+ precision,
+ truncate_long_and_double,
+ batch_size,
+ )
+ run_tensorrt(
+ model,
+ input_tensors,
+ params,
+ precision,
+ truncate_long_and_double,
+ is_trt_engine,
+ batch_size,
+ )
elif backend == "torch":
run_torch(model, input_tensors, params, precision, batch_size)
elif backend == "torch_tensorrt":
- run_torch_tensorrt(model, input_tensors, params, precision, truncate_long_and_double, batch_size)
+ run_torch_tensorrt(
+ model,
+ input_tensors,
+ params,
+ precision,
+ truncate_long_and_double,
+ batch_size,
+ )
elif backend == "fx2trt":
run_fx2trt(model, input_tensors, params, precision, batch_size)
elif backend == "tensorrt":
- run_tensorrt(model, input_tensors, params, precision, truncate_long_and_double, is_trt_engine, batch_size)
+ run_tensorrt(
+ model,
+ input_tensors,
+ params,
+ precision,
+ truncate_long_and_double,
+ is_trt_engine,
+ batch_size,
+ )
# Generate report
def recordStats(backend, timings, precision, batch_size=1):
times = np.array(timings)
@@ -289,12 +346,12 @@
"Backend": backend,
"Precision": precision,
"Batch size": batch_size,
"Median(FPS)": speed_med,
"Mean(FPS)": speed_mean,
- "Median-Latency(ms)": time_med*1000,
- "Mean-Latency(ms)": time_mean*1000,
+ "Median-Latency(ms)": time_med * 1000,
+ "Mean-Latency(ms)": time_mean * 1000,
}
results.append(stats)
def load_model(params):
@@ -328,36 +385,48 @@
type=str,
help="Load YAML based configuration file to run the inference. If this is used other params will be ignored",
)
# The following options are manual user provided settings
arg_parser.add_argument(
- "--backends", type=str, help="Comma separated string of backends. Eg: torch,torch_tensorrt,fx2trt,tensorrt"
+ "--backends",
+ type=str,
+ help="Comma separated string of backends. Eg: torch,torch_tensorrt,fx2trt,tensorrt",
)
arg_parser.add_argument("--model", type=str, help="Name of the model file")
arg_parser.add_argument(
"--inputs",
type=str,
help="List of input shapes. Eg: (1, 3, 224, 224)@fp32 for Resnet or (1, 128)@int32;(1, 128)@int32 for BERT",
)
- arg_parser.add_argument("--batch_size", type=int, default=1, help="Batch size to build and run")
+ arg_parser.add_argument(
+ "--batch_size", type=int, default=1, help="Batch size to build and run"
+ )
arg_parser.add_argument(
"--precision",
default="fp32",
type=str,
help="Comma separated list of precisions to build TensorRT engine Eg: fp32,fp16",
)
- arg_parser.add_argument("--calibration_cache", type=str, help="Name of the calibration cache file")
+ arg_parser.add_argument(
+ "--calibration_cache", type=str, help="Name of the calibration cache file"
+ )
arg_parser.add_argument("--device", type=int, help="device id")
arg_parser.add_argument(
- "--truncate", action="store_true", help="Truncate long and double weights in the network in Torch-TensorRT"
+ "--truncate",
+ action="store_true",
+ help="Truncate long and double weights in the network in Torch-TensorRT",
)
arg_parser.add_argument(
"--is_trt_engine",
action="store_true",
help="Boolean flag to determine if the user provided model is a TRT engine or not",
)
- arg_parser.add_argument("--report", type=str, help="Path of the output file where performance summary is written.")
+ arg_parser.add_argument(
+ "--report",
+ type=str,
+ help="Path of the output file where performance summary is written.",
+ )
args = arg_parser.parse_args()
cudnn.benchmark = True
# Create random input tensor of certain size
torch.manual_seed(12345)
@@ -370,19 +439,26 @@
# Default device is set to 0. Configurable using yaml config file.
torch.cuda.set_device(params.get("runtime").get("device", 0))
num_input = params.get("input").get("num_inputs")
- truncate_long_and_double = params.get("runtime").get("truncate_long_and_double", False)
+ truncate_long_and_double = params.get("runtime").get(
+ "truncate_long_and_double", False
+ )
batch_size = params.get("input").get("batch_size", 1)
for precision in params.get("runtime").get("precision", "fp32"):
input_tensors = []
num_input = params.get("input").get("num_inputs", 1)
for i in range(num_input):
inp_tensor = params.get("input").get("input" + str(i))
input_tensors.append(
- torch.randint(0, 2, tuple(d for d in inp_tensor), dtype=precision_to_dtype(precision)).cuda()
+ torch.randint(
+ 0,
+ 2,
+ tuple(d for d in inp_tensor),
+ dtype=precision_to_dtype(precision),
+ ).cuda()
)
if is_trt_engine:
print(
"Warning, TensorRT engine file is configured. Please make sure the precision matches with the TRT engine for reliable results"
@@ -393,11 +469,18 @@
model = model.half()
backends = params.get("backend")
# Run inference
status = run(
- model, backends, input_tensors, params, precision, truncate_long_and_double, batch_size, is_trt_engine
+ model,
+ backends,
+ input_tensors,
+ params,
+ precision,
+ truncate_long_and_double,
+ batch_size,
+ is_trt_engine,
)
else:
params = vars(args)
model_name = params["model"]
if os.path.exists(model_name):
@@ -415,16 +498,25 @@
batch_size = params["batch_size"]
is_trt_engine = params["is_trt_engine"]
precisions = parse_precisions(params["precision"])
for precision in precisions:
- input_tensors = parse_inputs(params["inputs"], precision_to_dtype(precision))
+ input_tensors = parse_inputs(
+ params["inputs"], precision_to_dtype(precision)
+ )
if not is_trt_engine and (precision == "fp16" or precision == "half"):
# If model is TensorRT serialized engine then model.half will report failure
model = model.half()
status = run(
- model, backends, input_tensors, params, precision, truncate_long_and_double, batch_size, is_trt_engine
+ model,
+ backends,
+ input_tensors,
+ params,
+ precision,
+ truncate_long_and_double,
+ batch_size,
+ is_trt_engine,
)
# Generate report
print("Model Summary: ", model_name)
summary = pd.DataFrame(results)
Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Code conforms to Python style guidelines
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Code conforms to C++ style guidelines
Merging this PR as this doesn't affect the library. Please re-open incase you see any issues |
Description
Refactor perf_run.py, add fx2trt backend support, usage via CLI arguments. Added a
benchmark.sh
which is used for internal perf regression testing.Type of change
Checklist: