fix: fix the fallback related issue after merging collection #1206

bowang007 · 2022-07-27T01:28:10Z

Signed-off-by: Bo Wang bowa@nvidia.com

Description

Please include a summary of the change and which issue is fixed. Please also include relevant motivation and context. List any dependencies that are required for this change.

Fixes # (issue)

Type of change

Please delete options that are not relevant and/or add your own.

Bug fix (non-breaking change which fixes an issue)
New feature (non-breaking change which adds functionality)
Breaking change (fix or feature that would cause existing functionality to not work as expected)
This change requires a documentation update

Checklist:

My code follows the style guidelines of this project (You can use the linters)
I have performed a self-review of my own code
I have commented my code, particularly in hard-to-understand areas and hacks
I have made corresponding changes to the documentation
I have added tests to verify my fix or my feature
New and existing unit tests pass locally with my changes
I have added the relevant labels to my PR in so that relevant reviewers are notified

Signed-off-by: Bo Wang <bowa@nvidia.com>

github-actions

There are some changes that do not conform to C++ style guidelines:

diff --git a/workspace/py/torch_tensorrt/csrc/torch_tensorrt_py.cpp b/tmp/changes.txt
index 6247789..6b1ffd4 100644
--- a/workspace/py/torch_tensorrt/csrc/torch_tensorrt_py.cpp
+++ b/tmp/changes.txt
@@ -1,8 +1,8 @@
#include "pybind11/pybind11.h"
#include "pybind11/stl.h"

-#include "Python.h"
#include "ATen/core/jit_type.h"
+#include "Python.h"
#include "core/compiler.h"
#include "core/conversion/conversion.h"
#include "tensorrt_classes.h"
@@ -182,7 +182,8 @@ PYBIND11_MODULE(_C, m) {
  py::class_<InputSignature>(m, "InputSignature")
      .def(pybind11::init([](py::object py_obj) {
        InputSignature input_signature;
-        input_signature.signature_ivalue = torch::jit::toIValue(std::move(py_obj), c10::PyObjectType::get(), c10::nullopt);
+        input_signature.signature_ivalue =
+            torch::jit::toIValue(std::move(py_obj), c10::PyObjectType::get(), c10::nullopt);
        return input_signature;
      }))
      .def("__str__", &InputSignature::to_str)
diff --git a/workspace/py/torch_tensorrt/csrc/tensorrt_classes.cpp b/tmp/changes.txt
index 9eb58b3..bccfdc0 100644
--- a/workspace/py/torch_tensorrt/csrc/tensorrt_classes.cpp
+++ b/tmp/changes.txt
@@ -191,40 +191,40 @@ std::string TorchFallback::to_str() {
}

void to_internal_input_signature(torch::jit::IValue input_ivalue, torch::jit::IValue& converted_ivalue) {
-    if (input_ivalue.isTuple()) {
-      auto input_tuple = input_ivalue.toTuple();
-      std::vector<torch::jit::IValue> converted_elements;
-      for (auto item: input_tuple->elements()) {
-        torch::jit::IValue converted_item;
-        to_internal_input_signature(item, converted_item);
-        converted_elements.push_back(converted_item);
-        auto tuple_ptr = c10::ivalue::Tuple::create(converted_elements);
-        converted_ivalue = torch::jit::IValue(tuple_ptr);
-      }
-    } else if(input_ivalue.isList()) {
-      auto input_list = input_ivalue.toList().vec();
-      c10::TypePtr type = input_list[0].type();
-      auto converted_elements = c10::impl::GenericList(type);
-      for (auto item: input_list) {
-        torch::jit::IValue converted_item;
-        to_internal_input_signature(item, converted_item);
-        converted_elements.push_back(converted_item);
-      }
-      converted_ivalue = torch::jit::IValue(converted_elements);
-    } else if(input_ivalue.isCustomClass()) {
-      core::ir::Input cur_input = (*(input_ivalue.toCustomClass<Input>())).toInternalInput();
-      converted_ivalue = torch::jit::IValue(std::move(c10::make_intrusive<core::ir::Input>(cur_input)));
-    } else if(input_ivalue.isPyObject()) {
-      auto py_object_holder = input_ivalue.toPyObjectHolder();
-      auto infer_type = py_object_holder->tryToInferType();
-      auto type = infer_type.type();
-      torch::jit::IValue ival = py_object_holder->toIValue(type);
+  if (input_ivalue.isTuple()) {
+    auto input_tuple = input_ivalue.toTuple();
+    std::vector<torch::jit::IValue> converted_elements;
+    for (auto item : input_tuple->elements()) {
      torch::jit::IValue converted_item;
-      to_internal_input_signature(ival, converted_item);
-      converted_ivalue = torch::jit::IValue(converted_item);
-    } else {
-      LOG_ERROR("Unknown input spec type");
+      to_internal_input_signature(item, converted_item);
+      converted_elements.push_back(converted_item);
+      auto tuple_ptr = c10::ivalue::Tuple::create(converted_elements);
+      converted_ivalue = torch::jit::IValue(tuple_ptr);
    }
+  } else if (input_ivalue.isList()) {
+    auto input_list = input_ivalue.toList().vec();
+    c10::TypePtr type = input_list[0].type();
+    auto converted_elements = c10::impl::GenericList(type);
+    for (auto item : input_list) {
+      torch::jit::IValue converted_item;
+      to_internal_input_signature(item, converted_item);
+      converted_elements.push_back(converted_item);
+    }
+    converted_ivalue = torch::jit::IValue(converted_elements);
+  } else if (input_ivalue.isCustomClass()) {
+    core::ir::Input cur_input = (*(input_ivalue.toCustomClass<Input>())).toInternalInput();
+    converted_ivalue = torch::jit::IValue(std::move(c10::make_intrusive<core::ir::Input>(cur_input)));
+  } else if (input_ivalue.isPyObject()) {
+    auto py_object_holder = input_ivalue.toPyObjectHolder();
+    auto infer_type = py_object_holder->tryToInferType();
+    auto type = infer_type.type();
+    torch::jit::IValue ival = py_object_holder->toIValue(type);
+    torch::jit::IValue converted_item;
+    to_internal_input_signature(ival, converted_item);
+    converted_ivalue = torch::jit::IValue(converted_item);
+  } else {
+    LOG_ERROR("Unknown input spec type");
+  }
}

core::CompileSpec init_compile_spec(CompileSpec external) {
@@ -281,11 +281,17 @@ core::CompileSpec CompileSpec::toInternalCompileSpec() {
  info.convert_info.engine_settings.num_avg_timing_iters = num_avg_timing_iters;
  TORCHTRT_CHECK(workspace_size >= 0, "workspace_size must be 0 or greater");
  info.convert_info.engine_settings.workspace_size = workspace_size;
-  TORCHTRT_CHECK(dla_sram_size >= 4096, "DLA managed SRAM size must be at least 4 KiB and must be a power of 2. This defaults to 1 MiB");
+  TORCHTRT_CHECK(
+      dla_sram_size >= 4096,
+      "DLA managed SRAM size must be at least 4 KiB and must be a power of 2. This defaults to 1 MiB");
  info.convert_info.engine_settings.dla_sram_size = dla_sram_size;
-  TORCHTRT_CHECK(dla_local_dram_size >= 4096, "DLA Local DRAM size must be at least 4 KiB and must be a power of 2. This defaults to 1 GiB");
+  TORCHTRT_CHECK(
+      dla_local_dram_size >= 4096,
+      "DLA Local DRAM size must be at least 4 KiB and must be a power of 2. This defaults to 1 GiB");
  info.convert_info.engine_settings.dla_local_dram_size = dla_local_dram_size;
-  TORCHTRT_CHECK(dla_global_dram_size >= 4096, "DLA Global DRAM size must be at least 4 KiB and must be a power of 2. This defaults to 512 MiB");
+  TORCHTRT_CHECK(
+      dla_global_dram_size >= 4096,
+      "DLA Global DRAM size must be at least 4 KiB and must be a power of 2. This defaults to 512 MiB");
  info.convert_info.engine_settings.dla_global_dram_size = dla_global_dram_size;
  return info;
}
@@ -304,7 +310,7 @@ std::string CompileSpec::stringify() {
  }
  ss << "    \"Enabled Precision\": [";
  for (auto p : enabled_precisions) {
-    ss << to_str(p) << ", " ;
+    ss << to_str(p) << ", ";
  }
  ss << "]" << std::endl;
  ss << "    \"TF32 Disabled\": " << disable_tf32 << std::endl;
diff --git a/workspace/py/torch_tensorrt/csrc/register_tensorrt_classes.cpp b/tmp/changes.txt
index 0eb6fba..274b40d 100644
--- a/workspace/py/torch_tensorrt/csrc/register_tensorrt_classes.cpp
+++ b/tmp/changes.txt
@@ -28,7 +28,8 @@ void RegisterTRTCompileSpec() {
          .def(torch::init<>())
          .def("__str__", &torch_tensorrt::pyapi::InputSignature::to_str);

-  ADD_FIELD_GET_SET_REGISTRATION(TRTInputSignatureTSRegistration, torch_tensorrt::pyapi::InputSignature, signature_ivalue);
+  ADD_FIELD_GET_SET_REGISTRATION(
+      TRTInputSignatureTSRegistration, torch_tensorrt::pyapi::InputSignature, signature_ivalue);

  static auto TORCHTRT_UNUSED TRTDeviceTSRegistration =
      torch::class_<torch_tensorrt::pyapi::Device>("tensorrt", "_Device")
@@ -73,7 +74,8 @@ void RegisterTRTCompileSpec() {
  ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, workspace_size);
  ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, dla_sram_size);
  ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, dla_local_dram_size);
-  ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, dla_global_dram_size);
+  ADD_FIELD_GET_SET_REGISTRATION(
+      TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, dla_global_dram_size);
  ADD_FIELD_GET_SET_REGISTRATION(
      TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, truncate_long_and_double);
}
diff --git a/workspace/py/torch_tensorrt/csrc/tensorrt_classes.h b/tmp/changes.txt
index d3b2274..be2fab3 100644
--- a/workspace/py/torch_tensorrt/csrc/tensorrt_classes.h
+++ b/tmp/changes.txt
@@ -58,7 +58,7 @@ struct Input : torch::CustomClassHolder {
};

struct InputSignature : torch::CustomClassHolder {
-  torch::jit::IValue signature_ivalue;  // nested Input, full input spec
+  torch::jit::IValue signature_ivalue; // nested Input, full input spec
  ADD_FIELD_GET_SET(signature_ivalue, torch::jit::IValue);
  std::string to_str();
};
diff --git a/workspace/core/compiler.cpp b/tmp/changes.txt
index e44ece5..caee900 100644
--- a/workspace/core/compiler.cpp
+++ b/tmp/changes.txt
@@ -308,70 +308,78 @@ void MapInputsAndDetermineDTypes(
    std::shared_ptr<torch::jit::Graph>& g,
    ir::StaticParams& static_params,
    ir::CollectionTypeMap& first_use_type_map) {
-    cfg.convert_info.collection_input_spec_map = std::move(ir::associate_specs_with_collection_inputs(g, cfg.graph_inputs, static_params));
+  cfg.convert_info.collection_input_spec_map =
+      std::move(ir::associate_specs_with_collection_inputs(g, cfg.graph_inputs, static_params));

-    auto collection_inputs = ir::get_collection_inputs(g, static_params);
-    LOG_DEBUG("In MapInputsAndDetermineDTypes, the g->inputs() size is " << g->inputs().size() << ", CollectionInputSpecMap size is" << collection_inputs.size());
+  auto collection_inputs = ir::get_collection_inputs(g, static_params);
+  LOG_DEBUG(
+      "In MapInputsAndDetermineDTypes, the g->inputs() size is "
+      << g->inputs().size() << ", CollectionInputSpecMap size is" << collection_inputs.size());

-    for (auto in : collection_inputs) {
-      std::vector<ir::Input>& spec = cfg.convert_info.collection_input_spec_map.find(in)->second;
-      std::vector<c10::optional<at::ScalarType>> est_type_opt;
+  for (auto in : collection_inputs) {
+    std::vector<ir::Input>& spec = cfg.convert_info.collection_input_spec_map.find(in)->second;
+    std::vector<c10::optional<at::ScalarType>> est_type_opt;

-      auto est_it = first_use_type_map.find(in);
-      if (est_it != first_use_type_map.end()) {
-        est_type_opt = first_use_type_map.find(in)->second;
-      }
-      // traverse elements in est_type_out and spec
-      for (size_t i = 0; i < est_type_opt.size(); i++) {
-        if (est_type_opt[i] && !spec[i].dtype_is_user_defined) {
-          // If we can calculate the type from the graph and the type was not defined by the user then use the calculated
-          // type
-          LOG_INFO(
-              "Since input type is not explicitly defined, infering using first tensor calculation\n  Inferred input "
-              << in->debugName() << " has type " << est_type_opt[i].value());
-          spec[i].dtype = util::ScalarTypeToTRTDataType(est_type_opt[i].value());
-        } else if (!est_type_opt[i] && !spec[i].dtype_is_user_defined) {
-          // If we cannot calculate the type and the user did not define the type, then default to FP32
-          LOG_WARNING(
-              "Cannot infer input type from calcuations in graph for input "
-              << in->debugName() << ". Assuming it is Float32. If not, specify input type explicity");
-          spec[i].dtype = nvinfer1::DataType::kFLOAT;
-        } else if (spec[i].dtype_is_user_defined && cfg.partition_info.enabled) {
-          if (!est_type_opt[i]) {
-            LOG_INFO("Cannot infer input tensor dtype in graph, compiler is going to use the user setting");
+    auto est_it = first_use_type_map.find(in);
+    if (est_it != first_use_type_map.end()) {
+      est_type_opt = first_use_type_map.find(in)->second;
+    }
+    // traverse elements in est_type_out and spec
+    for (size_t i = 0; i < est_type_opt.size(); i++) {
+      if (est_type_opt[i] && !spec[i].dtype_is_user_defined) {
+        // If we can calculate the type from the graph and the type was not defined by the user then use the calculated
+        // type
+        LOG_INFO(
+            "Since input type is not explicitly defined, infering using first tensor calculation\n  Inferred input "
+            << in->debugName() << " has type " << est_type_opt[i].value());
+        spec[i].dtype = util::ScalarTypeToTRTDataType(est_type_opt[i].value());
+      } else if (!est_type_opt[i] && !spec[i].dtype_is_user_defined) {
+        // If we cannot calculate the type and the user did not define the type, then default to FP32
+        LOG_WARNING(
+            "Cannot infer input type from calcuations in graph for input "
+            << in->debugName() << ". Assuming it is Float32. If not, specify input type explicity");
+        spec[i].dtype = nvinfer1::DataType::kFLOAT;
+      } else if (spec[i].dtype_is_user_defined && cfg.partition_info.enabled) {
+        if (!est_type_opt[i]) {
+          LOG_INFO("Cannot infer input tensor dtype in graph, compiler is going to use the user setting");
+          std::stringstream ss;
+          ss << "For input " << in->debugName() << ", found user specified input dtype as ";
+          ss << cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype;
+          ss << ". The compiler is going to use the user setting "
+             << cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype;
+          auto warn_str = ss.str();
+          LOG_WARNING(warn_str);
+          // Overwrite type map with user settings
+          first_use_type_map[in][i] = {
+              util::TRTDataTypeToScalarType(cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype)};
+
+        } else {
+          if (util::TRTDataTypeToScalarType(cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype) !=
+              est_type_opt[i].value()) {
            std::stringstream ss;
            ss << "For input " << in->debugName() << ", found user specified input dtype as ";
            ss << cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype;
-            ss << ". The compiler is going to use the user setting " << cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype;
+            ss << ", however when inspecting the graph, the input type expected was inferred to be ";
+            ss << est_type_opt[i].value() << std::endl;
+            ss << "The compiler is going to use the user setting "
+               << cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype;
+            ss << "\nThis conflict may cause an error at runtime due to partial compilation being enabled and therefore\n";
+            ss << "compatibility with PyTorch's data type convention is required.\n";
+            ss << "If you do indeed see errors at runtime either:\n";
+            ss << "- Remove the dtype spec for " << in->debugName() << std::endl;
+            ss << "- Disable partial compilation by setting require_full_compilation to True";
            auto warn_str = ss.str();
            LOG_WARNING(warn_str);
            // Overwrite type map with user settings
-            first_use_type_map[in][i] = {util::TRTDataTypeToScalarType(cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype)};
-
-          } else {
-            if (util::TRTDataTypeToScalarType(cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype) != est_type_opt[i].value()) {
-              std::stringstream ss;
-              ss << "For input " << in->debugName() << ", found user specified input dtype as ";
-              ss << cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype;
-              ss << ", however when inspecting the graph, the input type expected was inferred to be ";
-              ss << est_type_opt[i].value() << std::endl;
-              ss << "The compiler is going to use the user setting " << cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype;
-              ss << "\nThis conflict may cause an error at runtime due to partial compilation being enabled and therefore\n";
-              ss << "compatibility with PyTorch's data type convention is required.\n";
-              ss << "If you do indeed see errors at runtime either:\n";
-              ss << "- Remove the dtype spec for " << in->debugName() << std::endl;
-              ss << "- Disable partial compilation by setting require_full_compilation to True";
-              auto warn_str = ss.str();
-              LOG_WARNING(warn_str);
-              // Overwrite type map with user settings
-              first_use_type_map[in][i] = {util::TRTDataTypeToScalarType(cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype)};
-            }
+            first_use_type_map[in][i] = {
+                util::TRTDataTypeToScalarType(cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype)};
          }
-        } else {
-          // The user defined the type so no changes are necessary
        }
+      } else {
+        // The user defined the type so no changes are necessary
      }
    }
+  }
  // }
}

@@ -425,12 +433,13 @@ torch::jit::Module CompileGraph(const torch::jit::Module& mod, CompileSpec cfg)

      if (cfg.partition_info.enabled &&
          (!(cfg.lower_info.forced_fallback_modules.size() == 0 &&
-            cfg.partition_info.forced_fallback_operators.size() == 0 && isBlockConvertible)
-            || outputIsCollection)) {
-
+             cfg.partition_info.forced_fallback_operators.size() == 0 && isBlockConvertible) ||
+           outputIsCollection)) {
        std::unordered_map<torch::jit::Node*, int> fallback_nodes;
-        auto collection_input_ivalues_map = partitioning::generateRandomInputs(cfg.convert_info.collection_input_spec_map, first_use_types);
-        auto graph_and_mapping = ConstructFallbackGraph(new_mod, g->block(), collection_input_ivalues_map, cfg, static_params, fallback_nodes);
+        auto collection_input_ivalues_map =
+            partitioning::generateRandomInputs(cfg.convert_info.collection_input_spec_map, first_use_types);
+        auto graph_and_mapping = ConstructFallbackGraph(
+            new_mod, g->block(), collection_input_ivalues_map, cfg, static_params, fallback_nodes);
        new_g = graph_and_mapping.first;
        // renaming the input name of graph after fallback to ensure pytorch deserialize it correctly
        for (size_t i = 0; i < new_g->inputs().size(); ++i) {
diff --git a/workspace/core/conversion/conversion.cpp b/tmp/changes.txt
index 914f1dd..5f4b20e 100644
--- a/workspace/core/conversion/conversion.cpp
+++ b/tmp/changes.txt
@@ -135,12 +135,10 @@ void AddLayer(ConversionCtx* ctx, const torch::jit::Node* n) {
                       << "please report this error to https://www.github.com/NVIDIA/Torch-TensorRT/issues");
}

-void AddInputs(
-    ConversionCtx* ctx,
-    c10::ArrayRef<const torch::jit::Value*> inputs,
-    ConversionInfo& conversion_info) {
+void AddInputs(ConversionCtx* ctx, c10::ArrayRef<const torch::jit::Value*> inputs, ConversionInfo& conversion_info) {
  std::unordered_map<const torch::jit::Value*, ir::Input>& input_specs = conversion_info.inputs;
-  std::unordered_map<const torch::jit::Value*, std::vector<ir::Input>> collection_input_spec = conversion_info.collection_input_spec_map;
+  std::unordered_map<const torch::jit::Value*, std::vector<ir::Input>> collection_input_spec =
+      conversion_info.collection_input_spec_map;

  std::vector<const torch::jit::Value*> input_tensors;
  for (auto in : inputs) {
@@ -173,7 +171,7 @@ void AddInputs(
        "Cannot find an input spec associated with input: " << in->debugName());
    ir::Input spec;
    if (input_specs.find(in) != input_specs.end()) {
-        spec = input_specs.find(in)->second;
+      spec = input_specs.find(in)->second;
    } else {
      spec = collection_input_spec.find(in)->second[0]; // assume input is tensor
    }
@@ -559,8 +557,9 @@ std::set<std::string> ConvertableOpsInBlock(const torch::jit::Block* b) {
}

bool OutputIsCollection(const torch::jit::Block* b) {
-  for (auto out: b->outputs()) {
-    if(out->type()->kind() == torch::jit::TypeKind::TupleType || out->type()->kind() == torch::jit::TypeKind::ListType) {
+  for (auto out : b->outputs()) {
+    if (out->type()->kind() == torch::jit::TypeKind::TupleType ||
+        out->type()->kind() == torch::jit::TypeKind::ListType) {
      return true;
    }
  }
diff --git a/workspace/core/conversion/conversionctx/ConversionCtx.cpp b/tmp/changes.txt
index a24a159..71159eb 100644
--- a/workspace/core/conversion/conversionctx/ConversionCtx.cpp
+++ b/tmp/changes.txt
@@ -107,7 +107,7 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings)
  }

  cfg->setAvgTimingIterations(settings.num_avg_timing_iters);
-  if (settings.workspace_size != 0){
+  if (settings.workspace_size != 0) {
    cfg->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, settings.workspace_size);
  }

@@ -124,13 +124,13 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings)
        settings.enabled_precisions.find(nvinfer1::DataType::kFLOAT) == settings.enabled_precisions.end(),
        "DLA supports only fp16 or int8 precision");
    cfg->setDLACore(settings.device.dla_core);
-    if (settings.dla_sram_size != 1048576){
+    if (settings.dla_sram_size != 1048576) {
      cfg->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kDLA_MANAGED_SRAM, settings.dla_sram_size);
    }
-    if (settings.dla_local_dram_size != 1073741824){
+    if (settings.dla_local_dram_size != 1073741824) {
      cfg->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kDLA_LOCAL_DRAM, settings.dla_local_dram_size);
    }
-    if (settings.dla_global_dram_size != 536870912){
+    if (settings.dla_global_dram_size != 536870912) {
      cfg->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kDLA_GLOBAL_DRAM, settings.dla_global_dram_size);
    }
  }
diff --git a/workspace/core/conversion/converters/converter_util.cpp b/tmp/changes.txt
index a6a2bbd..7452615 100644
--- a/workspace/core/conversion/converters/converter_util.cpp
+++ b/tmp/changes.txt
@@ -207,13 +207,13 @@ nvinfer1::ITensor* clamp(
    nvinfer1::ITensor* lower_bound,
    nvinfer1::ITensor* upper_bound,
    std::string const& name) {
-
  auto max_layer = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kMAX, x, lower_bound, "max layer for " + name);
  TORCHTRT_CHECK(max_layer, "Unable to create max layer for clamp");
  LOG_DEBUG(ctx->logger, "Create " << max_layer->getName() << " for clamp");
  auto max_itensor = max_layer->getOutput(0);

-  auto min_layer = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kMIN, max_itensor, upper_bound, "min layer for " + name);
+  auto min_layer =
+      add_elementwise(ctx, nvinfer1::ElementWiseOperation::kMIN, max_itensor, upper_bound, "min layer for " + name);
  TORCHTRT_CHECK(min_layer, "Unable to create min layer for clamp");
  LOG_DEBUG(ctx->logger, "Create " << min_layer->getName() << " for clamp");
  auto min_itensor = min_layer->getOutput(0);
@@ -227,13 +227,13 @@ nvinfer1::ITensor* clamp_to_input_dim(
    nvinfer1::ITensor* input_dim,
    int nbdims,
    std::string const& name) {
-
  auto zero = torch::zeros({nbdims}).to(torch::kI32);
  auto zero_itensor = tensor_to_const(ctx, zero);
  auto one = torch::ones({nbdims}).to(torch::kI32);
  auto one_itensor = tensor_to_const(ctx, one);

-  auto upper_bound_layer = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kSUB, input_dim, one_itensor, "sub layer for " + name);
+  auto upper_bound_layer =
+      add_elementwise(ctx, nvinfer1::ElementWiseOperation::kSUB, input_dim, one_itensor, "sub layer for " + name);
  TORCHTRT_CHECK(upper_bound_layer, "Unable to create sub layer for clamp to inputDim");
  LOG_DEBUG(ctx->logger, "Create " << upper_bound_layer->getName() << " for clamp to inputDim");
  auto upper_bound = upper_bound_layer->getOutput(0);
@@ -243,7 +243,8 @@ nvinfer1::ITensor* clamp_to_input_dim(
  LOG_DEBUG(ctx->logger, "Create " << max_layer->getName() << " for clamp to inputDim");
  auto max_itensor = max_layer->getOutput(0);

-  auto min_layer = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kMIN, max_itensor, upper_bound, "min layer for " + name);
+  auto min_layer =
+      add_elementwise(ctx, nvinfer1::ElementWiseOperation::kMIN, max_itensor, upper_bound, "min layer for " + name);
  TORCHTRT_CHECK(min_layer, "Unable to create min_layer for clamp to inputDim");
  LOG_DEBUG(ctx->logger, "Create " << min_layer->getName() << " for clamp to inputDim");
  auto min_itensor = min_layer->getOutput(0);
@@ -257,7 +258,6 @@ nvinfer1::ITensor* normalize_indices(
    nvinfer1::ITensor* indices,
    int nbdims,
    std::string const& name) {
-
  auto zero = torch::zeros({nbdims}).to(torch::kI32);
  auto neg = -torch::ones({nbdims}).to(torch::kI32);
  auto zero_itensor = tensor_to_const(ctx, zero);
@@ -307,17 +307,20 @@ nvinfer1::ITensor* get_slice_size(
  at::Tensor one_tensor = torch::ones({nbdims}).to(torch::kI32);
  auto one_itensor = tensor_to_const(ctx, one_tensor);

-  auto sub_layer = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kSUB, end, start, "get_slice_size sub layer for " + name);
+  auto sub_layer =
+      add_elementwise(ctx, nvinfer1::ElementWiseOperation::kSUB, end, start, "get_slice_size sub layer for " + name);
  TORCHTRT_CHECK(sub_layer, "Unable to create sub layer in calculate_output_size");
  LOG_DEBUG(ctx->logger, "Create " << sub_layer->getName() << " for calculate_output_size");
  auto sub_itensor = sub_layer->getOutput(0);

-  auto div_layer = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kDIV, sub_itensor, stride, "get_slice_size div layer for " + name);
+  auto div_layer = add_elementwise(
+      ctx, nvinfer1::ElementWiseOperation::kDIV, sub_itensor, stride, "get_slice_size div layer for " + name);
  TORCHTRT_CHECK(div_layer, "Unable to create div layer in calculate_output_size");
  LOG_DEBUG(ctx->logger, "Create " << div_layer->getName() << " for calculate_output_size");
  auto div_itensor = div_layer->getOutput(0);

-  auto add_layer = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kSUM, div_itensor, one_itensor, "get_slice_size sum layer for " + name);
+  auto add_layer = add_elementwise(
+      ctx, nvinfer1::ElementWiseOperation::kSUM, div_itensor, one_itensor, "get_slice_size sum layer for " + name);
  TORCHTRT_CHECK(add_layer, "Unable to create add layer in calculate_output_size");
  LOG_DEBUG(ctx->logger, "Create " << add_layer->getName() << " for calculate_output_size");
  auto size_itensor = add_layer->getOutput(0);
diff --git a/workspace/core/conversion/converters/impl/select.cpp b/tmp/changes.txt
index 3599ab9..d33f09a 100644
--- a/workspace/core/conversion/converters/impl/select.cpp
+++ b/tmp/changes.txt
@@ -103,121 +103,118 @@ nvinfer1::ITensor* roll(

auto select_registrations TORCHTRT_UNUSED =
    RegisterNodeConversionPatterns()
-        .pattern(
-            {"aten::select.int(Tensor(a) self, int dim, int index) -> (Tensor(a))",
-             [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
-               auto in = args[0].ITensorOrFreeze(ctx);
-               auto maxDim = static_cast<int64_t>(in->getDimensions().nbDims);
-               auto dim = args[1].unwrapToInt();
-               // Handle negative axis by refering to nbDims of input Tensor
-               dim = dim < 0 ? dim + maxDim : dim;
-               auto ind = (int32_t)args[2].unwrapToInt();
-               // Along the specified dimension, handle negative index by subtracting along length of dimension.
-               ind = ind < 0 ? ind + in->getDimensions().d[dim] : ind;
-               LOG_DEBUG("Gather input dimensions: " << in->getDimensions());
-               LOG_DEBUG("Dimension to select: " << dim);
-               LOG_DEBUG("Index: " << ind);
-
-               // index to access needs to be an at::Tensor
-               at::Tensor indices = torch::tensor({ind}).to(torch::kI32);
-               auto const_out = tensor_to_const(ctx, indices);
-
-               // IGatherLayer takes in input tensor, the indices, and the axis
-               // of input tensor to take indices from
-               auto gather_layer = ctx->net->addGather(*in, *const_out, dim);
-               TORCHTRT_CHECK(gather_layer, "Unable to create gather layer from node: " << *n);
-               auto out = gather_layer->getOutput(0);
+        .pattern({"aten::select.int(Tensor(a) self, int dim, int index) -> (Tensor(a))",
+                  [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
+                    auto in = args[0].ITensorOrFreeze(ctx);
+                    auto maxDim = static_cast<int64_t>(in->getDimensions().nbDims);
+                    auto dim = args[1].unwrapToInt();
+                    // Handle negative axis by refering to nbDims of input Tensor
+                    dim = dim < 0 ? dim + maxDim : dim;
+                    auto ind = (int32_t)args[2].unwrapToInt();
+                    // Along the specified dimension, handle negative index by subtracting along length of dimension.
+                    ind = ind < 0 ? ind + in->getDimensions().d[dim] : ind;
+                    LOG_DEBUG("Gather input dimensions: " << in->getDimensions());
+                    LOG_DEBUG("Dimension to select: " << dim);
+                    LOG_DEBUG("Index: " << ind);
+
+                    // index to access needs to be an at::Tensor
+                    at::Tensor indices = torch::tensor({ind}).to(torch::kI32);
+                    auto const_out = tensor_to_const(ctx, indices);
+
+                    // IGatherLayer takes in input tensor, the indices, and the axis
+                    // of input tensor to take indices from
+                    auto gather_layer = ctx->net->addGather(*in, *const_out, dim);
+                    TORCHTRT_CHECK(gather_layer, "Unable to create gather layer from node: " << *n);
+                    auto out = gather_layer->getOutput(0);
+
+                    LOG_DEBUG("Gather tensor shape: " << out->getDimensions());
+
+                    if (out->getDimensions().nbDims != 1) {
+                      // IShuffleLayer removes redundant dimensions
+                      auto shuffle_layer = ctx->net->addShuffle(*out);
+                      TORCHTRT_CHECK(shuffle_layer, "Unable to create shuffle layer from node: " << *n);
+                      shuffle_layer->setReshapeDimensions(util::squeezeDims(out->getDimensions(), dim));
+                      shuffle_layer->setName(util::node_info(n).c_str());
+                      out = shuffle_layer->getOutput(0);
+                    }
+
+                    out = ctx->AssociateValueAndTensor(n->outputs()[0], out);
+
+                    LOG_DEBUG("Output tensor shape: " << out->getDimensions());

-               LOG_DEBUG("Gather tensor shape: " << out->getDimensions());
+                    return true;
+                  }})
+        .pattern({"aten::narrow(Tensor(a) self, int dim, int start, int length) -> Tensor(a)",
+                  [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
+                    auto in = args[0].ITensor();
+                    auto axis = args[1].unwrapToInt();
+                    auto start = (int32_t)args[2].unwrapToInt();
+                    auto length = (int32_t)args[3].unwrapToInt();

-               if (out->getDimensions().nbDims != 1) {
-                 // IShuffleLayer removes redundant dimensions
-                 auto shuffle_layer = ctx->net->addShuffle(*out);
-                 TORCHTRT_CHECK(shuffle_layer, "Unable to create shuffle layer from node: " << *n);
-                 shuffle_layer->setReshapeDimensions(util::squeezeDims(out->getDimensions(), dim));
-                 shuffle_layer->setName(util::node_info(n).c_str());
-                 out = shuffle_layer->getOutput(0);
-               }
+                    // index to access needs to be an at::Tensor
+                    at::Tensor indices = torch::arange(start, start + length, 1).to(torch::kI32);
+                    auto weights = Weights(ctx, indices);

-               out = ctx->AssociateValueAndTensor(n->outputs()[0], out);
+                    // IConstantLayer to convert indices from Weights to ITensor
+                    auto const_layer = ctx->net->addConstant(weights.shape, weights.data);
+                    TORCHTRT_CHECK(const_layer, "Unable to create constant layer from node: " << *n);
+                    auto const_out = const_layer->getOutput(0);

-               LOG_DEBUG("Output tensor shape: " << out->getDimensions());
+                    // IGatherLayer takes in input tensor, the indices, and the axis
+                    // of input tensor to take indices from
+                    auto gather_layer = ctx->net->addGather(*in, *const_out, axis);
+                    TORCHTRT_CHECK(gather_layer, "Unable to create gather layer from node: " << *n);
+                    auto gather_out = gather_layer->getOutput(0);

-               return true;
-             }})
-        .pattern(
-            {"aten::narrow(Tensor(a) self, int dim, int start, int length) -> Tensor(a)",
-             [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
-               auto in = args[0].ITensor();
-               auto axis = args[1].unwrapToInt();
-               auto start = (int32_t)args[2].unwrapToInt();
-               auto length = (int32_t)args[3].unwrapToInt();
-
-               // index to access needs to be an at::Tensor
-               at::Tensor indices = torch::arange(start, start + length, 1).to(torch::kI32);
-               auto weights = Weights(ctx, indices);
-
-               // IConstantLayer to convert indices from Weights to ITensor
-               auto const_layer = ctx->net->addConstant(weights.shape, weights.data);
-               TORCHTRT_CHECK(const_layer, "Unable to create constant layer from node: " << *n);
-               auto const_out = const_layer->getOutput(0);
-
-               // IGatherLayer takes in input tensor, the indices, and the axis
-               // of input tensor to take indices from
-               auto gather_layer = ctx->net->addGather(*in, *const_out, axis);
-               TORCHTRT_CHECK(gather_layer, "Unable to create gather layer from node: " << *n);
-               auto gather_out = gather_layer->getOutput(0);
-
-               // IShuffleLayer removes redundant dimensions
-               auto shuffle_layer = ctx->net->addShuffle(*gather_out);
-               TORCHTRT_CHECK(shuffle_layer, "Unable to create shuffle layer from node: " << *n);
-               shuffle_layer->setReshapeDimensions(util::unpadDims(gather_out->getDimensions()));
-               shuffle_layer->setName(util::node_info(n).c_str());
-               auto shuffle_out = shuffle_layer->getOutput(0);
+                    // IShuffleLayer removes redundant dimensions
+                    auto shuffle_layer = ctx->net->addShuffle(*gather_out);
+                    TORCHTRT_CHECK(shuffle_layer, "Unable to create shuffle layer from node: " << *n);
+                    shuffle_layer->setReshapeDimensions(util::unpadDims(gather_out->getDimensions()));
+                    shuffle_layer->setName(util::node_info(n).c_str());
+                    auto shuffle_out = shuffle_layer->getOutput(0);

-               auto out = ctx->AssociateValueAndTensor(n->outputs()[0], shuffle_out);
+                    auto out = ctx->AssociateValueAndTensor(n->outputs()[0], shuffle_out);

-               LOG_DEBUG("Output tensor shape: " << out->getDimensions());
+                    LOG_DEBUG("Output tensor shape: " << out->getDimensions());

-               return true;
-             }})
-        .pattern(
-            {"aten::narrow.Tensor(Tensor(a) self, int dim, Tensor start, int length) -> Tensor(a)",
-             [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
-               auto in = args[0].ITensor();
-               auto axis = args[1].unwrapToInt();
-               torch::Tensor start = args[2].IValue()->toTensor().to(torch::kI32);
-               int32_t startIdx = start.item().to<int32_t>();
-               auto length = (int32_t)args[3].unwrapToInt();
-
-               // index to access needs to be an at::Tensor
-               at::Tensor indices = torch::arange(startIdx, startIdx + length, 1).to(torch::kI32);
-               auto weights = Weights(ctx, indices);
-
-               // IConstantLayer to convert indices from Weights to ITensor
-               auto const_layer = ctx->net->addConstant(weights.shape, weights.data);
-               TORCHTRT_CHECK(const_layer, "Unable to create constant layer from node: " << *n);
-               auto const_out = const_layer->getOutput(0);
-
-               // IGatherLayer takes in input tensor, the indices, and the axis
-               // of input tensor to take indices from
-               auto gather_layer = ctx->net->addGather(*in, *const_out, axis);
-               TORCHTRT_CHECK(gather_layer, "Unable to create gather layer from node: " << *n);
-               auto gather_out = gather_layer->getOutput(0);
-
-               // IShuffleLayer removes redundant dimensions
-               auto shuffle_layer = ctx->net->addShuffle(*gather_out);
-               TORCHTRT_CHECK(shuffle_layer, "Unable to create shuffle layer from node: " << *n);
-               shuffle_layer->setReshapeDimensions(util::unpadDims(gather_out->getDimensions()));
-               shuffle_layer->setName(util::node_info(n).c_str());
-               auto shuffle_out = shuffle_layer->getOutput(0);
-
-               auto out = ctx->AssociateValueAndTensor(n->outputs()[0], shuffle_out);
-
-               LOG_DEBUG("Output tensor shape: " << out->getDimensions());
+                    return true;
+                  }})
+        .pattern({"aten::narrow.Tensor(Tensor(a) self, int dim, Tensor start, int length) -> Tensor(a)",
+                  [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
+                    auto in = args[0].ITensor();
+                    auto axis = args[1].unwrapToInt();
+                    torch::Tensor start = args[2].IValue()->toTensor().to(torch::kI32);
+                    int32_t startIdx = start.item().to<int32_t>();
+                    auto length = (int32_t)args[3].unwrapToInt();
+
+                    // index to access needs to be an at::Tensor
+                    at::Tensor indices = torch::arange(startIdx, startIdx + length, 1).to(torch::kI32);
+                    auto weights = Weights(ctx, indices);
+
+                    // IConstantLayer to convert indices from Weights to ITensor
+                    auto const_layer = ctx->net->addConstant(weights.shape, weights.data);
+                    TORCHTRT_CHECK(const_layer, "Unable to create constant layer from node: " << *n);
+                    auto const_out = const_layer->getOutput(0);
+
+                    // IGatherLayer takes in input tensor, the indices, and the axis
+                    // of input tensor to take indices from
+                    auto gather_layer = ctx->net->addGather(*in, *const_out, axis);
+                    TORCHTRT_CHECK(gather_layer, "Unable to create gather layer from node: " << *n);
+                    auto gather_out = gather_layer->getOutput(0);
+
+                    // IShuffleLayer removes redundant dimensions
+                    auto shuffle_layer = ctx->net->addShuffle(*gather_out);
+                    TORCHTRT_CHECK(shuffle_layer, "Unable to create shuffle layer from node: " << *n);
+                    shuffle_layer->setReshapeDimensions(util::unpadDims(gather_out->getDimensions()));
+                    shuffle_layer->setName(util::node_info(n).c_str());
+                    auto shuffle_out = shuffle_layer->getOutput(0);
+
+                    auto out = ctx->AssociateValueAndTensor(n->outputs()[0], shuffle_out);
+
+                    LOG_DEBUG("Output tensor shape: " << out->getDimensions());

-               return true;
-             }})
+                    return true;
+                  }})
        .pattern(
            {"aten::embedding(Tensor weight, Tensor indices, int padding_idx=-1, bool scale_grad_by_freq=False, bool sparse=False) -> (Tensor)",
             [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
@@ -239,30 +236,29 @@ auto select_registrations TORCHTRT_UNUSED =

               return true;
             }})
-        .pattern(
-            {"aten::roll(Tensor self, int[1] shifts, int[1] dims=[]) -> (Tensor)",
-             [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
-               auto in = args[0].ITensor();
-               auto shifts = args[1].unwrapToIntList().vec();
-               auto dims = args[2].unwrapToIntList().vec();
-
-               TORCHTRT_CHECK(dims.size() == shifts.size(), "dims.size() should be equal to shifts.size()");
-               if (ctx->input_is_dynamic) {
-                 TORCHTRT_THROW_ERROR("aten::roll is currently not support in dynamic input shape compilation");
-               } else {
-                 auto in_shape = util::toVec(in->getDimensions());
-                 for (size_t i = 0; i < dims.size(); i++) {
-                   auto dim = dims[i] < 0 ? (in_shape.size() + dims[i]) : dims[i];
-                   TORCHTRT_CHECK(dim < in_shape.size(), "Dimension out of range");
-                   in = roll(ctx, in, shifts[i], dim, in_shape);
-                 }
-                 auto out = ctx->AssociateValueAndTensor(n->outputs()[0], in);
-
-                 LOG_DEBUG("Output tensor shape: " << out->getDimensions());
-
-                 return true;
-               }
-             }})
+        .pattern({"aten::roll(Tensor self, int[1] shifts, int[1] dims=[]) -> (Tensor)",
+                  [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
+                    auto in = args[0].ITensor();
+                    auto shifts = args[1].unwrapToIntList().vec();
+                    auto dims = args[2].unwrapToIntList().vec();
+
+                    TORCHTRT_CHECK(dims.size() == shifts.size(), "dims.size() should be equal to shifts.size()");
+                    if (ctx->input_is_dynamic) {
+                      TORCHTRT_THROW_ERROR("aten::roll is currently not support in dynamic input shape compilation");
+                    } else {
+                      auto in_shape = util::toVec(in->getDimensions());
+                      for (size_t i = 0; i < dims.size(); i++) {
+                        auto dim = dims[i] < 0 ? (in_shape.size() + dims[i]) : dims[i];
+                        TORCHTRT_CHECK(dim < in_shape.size(), "Dimension out of range");
+                        in = roll(ctx, in, shifts[i], dim, in_shape);
+                      }
+                      auto out = ctx->AssociateValueAndTensor(n->outputs()[0], in);
+
+                      LOG_DEBUG("Output tensor shape: " << out->getDimensions());
+
+                      return true;
+                    }
+                  }})
        .pattern(
            {"aten::index.Tensor(Tensor self, Tensor?[] indices) -> (Tensor)",
             [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
@@ -319,7 +315,8 @@ auto select_registrations TORCHTRT_UNUSED =
               int startIdx = 0;
               auto startIdxIVal = args[2].IValue();
               if (!startIdxIVal->isNone()) {
-                 startIdx = startIdxIVal->toInt() > std::numeric_limits<int32_t>::max() ? maxDim : startIdxIVal->toInt();
+                 startIdx =
+                     startIdxIVal->toInt() > std::numeric_limits<int32_t>::max() ? maxDim : startIdxIVal->toInt();
                 startIdx = maxDim == -1 ? startIdx : std::min(startIdx, maxDim);
               }
               // Handle case when given tensor index is negative
@@ -331,7 +328,8 @@ auto select_registrations TORCHTRT_UNUSED =
               int endIdx = maxDim; // -1 for dynamic shape
               auto endIdxIVal = args[3].IValue();
               if (!endIdxIVal->isNone()) {
-                 int truncate_value = endIdxIVal->toInt() > std::numeric_limits<int32_t>::max() ? maxDim : endIdxIVal->toInt();
+                 int truncate_value =
+                     endIdxIVal->toInt() > std::numeric_limits<int32_t>::max() ? maxDim : endIdxIVal->toInt();
                 endIdx = maxDim == -1 ? truncate_value : std::min(truncate_value, maxDim);
               }
               if (maxDim > 0) {
@@ -385,7 +383,8 @@ auto select_registrations TORCHTRT_UNUSED =
                 // update start and end
                 nvinfer1::ITensor* out_start;
                 nvinfer1::ITensor* out_end;
-                 auto start_end = normalize_start_and_end(ctx, ishape_tensor, start_itensor, end_itensor, nbdims, node_name);
+                 auto start_end =
+                     normalize_start_and_end(ctx, ishape_tensor, start_itensor, end_itensor, nbdims, node_name);
                 out_start = start_end[0];
                 out_end = start_end[1];

@@ -397,7 +396,7 @@ auto select_registrations TORCHTRT_UNUSED =
                 slice_layer->setInput(2, *size_itensor); // size, must be set if input is dynamic
               }
               auto slice_out = slice_layer->getOutput(0);
-               
+
               auto out = ctx->AssociateValueAndTensor(n->outputs()[0], slice_out);
               LOG_DEBUG("Slice layer output shape: " << out->getDimensions());

diff --git a/workspace/core/partitioning/shape_analysis.cpp b/tmp/changes.txt
index 1221318..8767048 100644
--- a/workspace/core/partitioning/shape_analysis.cpp
+++ b/tmp/changes.txt
@@ -9,31 +9,28 @@ namespace core {
namespace partitioning {

at::Tensor generateSingleInput(ir::Input& input, c10::optional<at::ScalarType>& type_opt) {
-      auto cur_shape = input.input_shape;
-      std::vector<int64_t> shape;
-      shape.insert(shape.begin(), std::begin(cur_shape.d), std::begin(cur_shape.d) + cur_shape.nbDims);
-      // auto type_opt = types[input.first][i];
-      auto type = at::kFloat;
-      if (type_opt) {
-        type = type_opt.value();
-      } else {
-        LOG_WARNING("Input type for doing shape analysis could not be determined, defaulting to F32");
-      }
-      auto in = at::randint(5, shape, {at::kCUDA}).to(type);
-      // ivalue_map[input.first] = in.clone();
-      return in;
+  auto cur_shape = input.input_shape;
+  std::vector<int64_t> shape;
+  shape.insert(shape.begin(), std::begin(cur_shape.d), std::begin(cur_shape.d) + cur_shape.nbDims);
+  // auto type_opt = types[input.first][i];
+  auto type = at::kFloat;
+  if (type_opt) {
+    type = type_opt.value();
+  } else {
+    LOG_WARNING("Input type for doing shape analysis could not be determined, defaulting to F32");
+  }
+  auto in = at::randint(5, shape, {at::kCUDA}).to(type);
+  // ivalue_map[input.first] = in.clone();
+  return in;
}

std::unordered_map<const torch::jit::Value*, torch::jit::IValue> generateRandomInputs(
    std::unordered_map<const torch::jit::Value*, std::vector<ir::Input>>& inputs,
    std::unordered_map<const torch::jit::Value*, std::vector<c10::optional<at::ScalarType>>>& types) {
-
  // generate random inputs for running pytorch segments
  std::unordered_map<const torch::jit::Value*, torch::jit::IValue> ivalue_map;

-
  for (auto& input : inputs) {
-
    if (input.first->type()->kind() == torch::jit::TypeKind::ListType) {
      // create list
      std::vector<torch::jit::IValue> list;
@@ -56,7 +53,6 @@ std::unordered_map<const torch::jit::Value*, torch::jit::IValue> generateRandomI
    } else {
      auto in = generateSingleInput(input.second[0], types[input.first][0]);
      ivalue_map[input.first] = in.clone();
-
    }
  }
  return ivalue_map;
@@ -109,7 +105,8 @@ void getSegmentsOutputByRunning(
      jit_inputs_ivalues.push_back(ivalues_maps[input].toBool());
    } else if (input->type()->kind() == torch::jit::TypeKind::ListType) {
      // create list
-      jit_inputs_ivalues.push_back(ivalues_maps[input].toList());;
+      jit_inputs_ivalues.push_back(ivalues_maps[input].toList());
+      ;
    } else if (input->type()->kind() == torch::jit::TypeKind::TupleType) {
      // create tuple
      jit_inputs_ivalues.push_back(ivalues_maps[input].toTuple());
diff --git a/workspace/core/partitioning/partitioning.cpp b/tmp/changes.txt
index 1a7a477..a65a2b9 100644
--- a/workspace/core/partitioning/partitioning.cpp
+++ b/tmp/changes.txt
@@ -22,7 +22,8 @@ inline bool isTensor(torch::jit::Value* val) {
}

inline bool isListOrTuple(torch::jit::Value* val) {
-  return val->type()->kind() == torch::jit::TypeKind::TupleType || val->type()->kind() == torch::jit::TypeKind::ListType;
+  return val->type()->kind() == torch::jit::TypeKind::TupleType ||
+      val->type()->kind() == torch::jit::TypeKind::ListType;
}

bool containNonTensorOutputs(torch::jit::Node* n) {
@@ -124,7 +125,8 @@ void find_all_fallback_nodes(
      if (!isTensor(output) && !isListOrTuple(output)) {
        for (auto use : output->uses()) {
          auto node = use.user;
-          if (node->kind() != torch::jit::prim::Constant && global_fallback_nodes.insert({node, FallbackNodeType::kNON_TENSOR}).second) {
+          if (node->kind() != torch::jit::prim::Constant &&
+              global_fallback_nodes.insert({node, FallbackNodeType::kNON_TENSOR}).second) {
            q.push(node);
          }
        }
diff --git a/workspace/core/ir/GraphInputs.cpp b/tmp/changes.txt
index 007a727..a1b1196 100644
--- a/workspace/core/ir/GraphInputs.cpp
+++ b/tmp/changes.txt
@@ -5,70 +5,74 @@ namespace torch_tensorrt {
namespace core {
namespace ir {

-void flatten_dfs(std::vector<torch_tensorrt::core::ir::Input>& flattened_inputs, std::vector<std::vector<torch_tensorrt::core::ir::Input>>& collection_inputs,
-                 torch::jit::IValue input_ivalue, int level, int index) {
-    if (input_ivalue.isTuple()) {
-      auto input_tuple = input_ivalue.toTuple();
-      int idx = 0;
-      if (level == 0) {
-        collection_inputs.resize(input_tuple->elements().size());
-      }
-      for (auto item: input_tuple->elements()) {
-        torch::jit::IValue converted_item;
-        int cur_idx = level < 1 ? idx: index;
-        flatten_dfs(flattened_inputs, collection_inputs, item, level+1, cur_idx);
-        idx++;
-      }
-    } else if(input_ivalue.isList()) {
-      auto input_list = input_ivalue.toList().vec();
-      if (level == 0) {
-        collection_inputs.resize(input_list.size());
-      }
-      c10::TypePtr type = input_list[0].type();
-      auto converted_elements = c10::impl::GenericList(type);
-      int idx = 0;
-      for (auto item: input_list) {
-        int cur_idx = level < 1 ? idx: index;
-        flatten_dfs(flattened_inputs, collection_inputs, item, level+1, cur_idx);
-        idx++;
-      }
-    } else if(input_ivalue.isCustomClass()) {
-      torch_tensorrt::core::ir::Input cur_input = *(input_ivalue.toCustomClass<torch_tensorrt::core::ir::Input>());
-      flattened_inputs.push_back(cur_input);
-      if (level == 0) {  // a single value like A
-        collection_inputs.resize(1);
-        collection_inputs[0].push_back(cur_input);
-      } else if (level == 1) { // like A in [A, A] or [(B, B), A]
-        collection_inputs[index].push_back(cur_input);
-      } else if (level == 2) {  // like A in [(A, A), C]
-        collection_inputs[index].push_back(cur_input);
-      } else {// only support 2 level
-        LOG_ERROR("Input nesting depth exceeds currently supported depth (3), use 1 level: [A, B], or 2 level: [A, (B, C)]");
-      }
+void flatten_dfs(
+    std::vector<torch_tensorrt::core::ir::Input>& flattened_inputs,
+    std::vector<std::vector<torch_tensorrt::core::ir::Input>>& collection_inputs,
+    torch::jit::IValue input_ivalue,
+    int level,
+    int index) {
+  if (input_ivalue.isTuple()) {
+    auto input_tuple = input_ivalue.toTuple();
+    int idx = 0;
+    if (level == 0) {
+      collection_inputs.resize(input_tuple->elements().size());
    }
+    for (auto item : input_tuple->elements()) {
+      torch::jit::IValue converted_item;
+      int cur_idx = level < 1 ? idx : index;
+      flatten_dfs(flattened_inputs, collection_inputs, item, level + 1, cur_idx);
+      idx++;
+    }
+  } else if (input_ivalue.isList()) {
+    auto input_list = input_ivalue.toList().vec();
+    if (level == 0) {
+      collection_inputs.resize(input_list.size());
+    }
+    c10::TypePtr type = input_list[0].type();
+    auto converted_elements = c10::impl::GenericList(type);
+    int idx = 0;
+    for (auto item : input_list) {
+      int cur_idx = level < 1 ? idx : index;
+      flatten_dfs(flattened_inputs, collection_inputs, item, level + 1, cur_idx);
+      idx++;
+    }
+  } else if (input_ivalue.isCustomClass()) {
+    torch_tensorrt::core::ir::Input cur_input = *(input_ivalue.toCustomClass<torch_tensorrt::core::ir::Input>());
+    flattened_inputs.push_back(cur_input);
+    if (level == 0) { // a single value like A
+      collection_inputs.resize(1);
+      collection_inputs[0].push_back(cur_input);
+    } else if (level == 1) { // like A in [A, A] or [(B, B), A]
+      collection_inputs[index].push_back(cur_input);
+    } else if (level == 2) { // like A in [(A, A), C]
+      collection_inputs[index].push_back(cur_input);
+    } else { // only support 2 level
+      LOG_ERROR(
+          "Input nesting depth exceeds currently supported depth (3), use 1 level: [A, B], or 2 level: [A, (B, C)]");
+    }
+  }
}

-
GraphInputs::GraphInputs(std::vector<ir::Input> inputs_) {
-    LOG_DEBUG("Construct GraphInput with ir::Input");
-    inputs = inputs_;
-    collection_inputs.resize(inputs_.size());
-    for (size_t i = 0; i < inputs_.size(); i++) {
-        collection_inputs[i].push_back(inputs_[i]);
-    }
+  LOG_DEBUG("Construct GraphInput with ir::Input");
+  inputs = inputs_;
+  collection_inputs.resize(inputs_.size());
+  for (size_t i = 0; i < inputs_.size(); i++) {
+    collection_inputs[i].push_back(inputs_[i]);
+  }
}

GraphInputs::GraphInputs(torch::jit::IValue& input_signature_) {
-    LOG_DEBUG("Construct GraphInput with IValue");
+  LOG_DEBUG("Construct GraphInput with IValue");

-    std::vector<torch_tensorrt::core::ir::Input> flattened_inputs;
-    std::vector<std::vector<torch_tensorrt::core::ir::Input>> collection_inputs_;
+  std::vector<torch_tensorrt::core::ir::Input> flattened_inputs;
+  std::vector<std::vector<torch_tensorrt::core::ir::Input>> collection_inputs_;

-    flatten_dfs(flattened_inputs, collection_inputs_, input_signature_, 0, 0);
-    inputs = flattened_inputs;
-    input_signature = input_signature_;
-    collection_inputs = collection_inputs_;
-    LOG_DEBUG("Collection Input Size: " << collection_inputs_.size());
+  flatten_dfs(flattened_inputs, collection_inputs_, input_signature_, 0, 0);
+  inputs = flattened_inputs;
+  input_signature = input_signature_;
+  collection_inputs = collection_inputs_;
+  LOG_DEBUG("Collection Input Size: " << collection_inputs_.size());
}

} // namespace ir
diff --git a/workspace/core/ir/StaticParams.cpp b/tmp/changes.txt
index 0073ad2..8502c80 100644
--- a/workspace/core/ir/StaticParams.cpp
+++ b/tmp/changes.txt
@@ -12,8 +12,7 @@ StaticParams get_static_params(c10::ArrayRef<torch::jit::Value*> inputs, std::ve
  auto param_it = params.begin();
  for (auto in : inputs) {
    // handle TensorType, TupleType and ListType
-    if (in->type() != c10::TensorType::get() && 
-        in->type()->kind() != torch::jit::TypeKind::TupleType &&
+    if (in->type() != c10::TensorType::get() && in->type()->kind() != torch::jit::TypeKind::TupleType &&
        in->type()->kind() != torch::jit::TypeKind::ListType && param_it != params.end()) {
      static_params[in] = *param_it;
      ++param_it;
diff --git a/workspace/core/ir/ir.cpp b/tmp/changes.txt
index cc82fe0..d9b021e 100644
--- a/workspace/core/ir/ir.cpp
+++ b/tmp/changes.txt
@@ -35,7 +35,9 @@ InputSpecMap pair_input_vals_with_specs(std::vector<const torch::jit::Value*> va
  return a;
}

-CollectionInputSpecMap pair_input_vals_with_specs_collection(std::vector<const torch::jit::Value*> vals, std::vector<std::vector<Input>>& specs) {
+CollectionInputSpecMap pair_input_vals_with_specs_collection(
+    std::vector<const torch::jit::Value*> vals,
+    std::vector<std::vector<Input>>& specs) {
  TORCHTRT_CHECK(
      vals.size() == specs.size(),
      "Expected dimension specifications for all input tensors"
@@ -64,7 +66,7 @@ std::vector<const torch::jit::Value*> get_tensor_inputs(
    // input.1:Tensor -> used
    if (in->type()->isSubtypeOf(c10::TensorType::get()) && static_params.find(in) == static_params.end()) {
      input_tensors.push_back(in);
-    } 
+    }
  }
  return input_tensors;
}
@@ -80,7 +82,8 @@ std::vector<const torch::jit::Value*> get_collection_inputs(
    if (in->type()->isSubtypeOf(c10::TensorType::get()) && static_params.find(in) == static_params.end()) {
      input_tensors.push_back(in);
    } else if (in->type()->kind() == torch::jit::TypeKind::TupleType && static_params.find(in) == static_params.end()) {
-    // } else if (in->type()->isSubtypeOf(c10::TupleType::create()) && static_params.find(in) == static_params.end()) {
+      // } else if (in->type()->isSubtypeOf(c10::TupleType::create()) && static_params.find(in) == static_params.end())
+      // {
      input_tensors.push_back(in); // push original tuple
      at::ArrayRef<torch::jit::Value*> unpack_tuple = torch::jit::createTupleUnpack(in);
      LOG_DEBUG("get_collection_inputs, tuple size " << unpack_tuple.size());
@@ -190,15 +193,15 @@ TypeMap get_block_first_calc_dtypes_opt(torch::jit::Block* b) {
    if (i->type() == c10::TensorType::get()) {
      torch::jit::Value* in = i;
      types.insert({in, get_value_first_calc_dtype_opt(b, i)});
-    } else if(i->type()->cast<c10::TupleType>()) {
+    } else if (i->type()->cast<c10::TupleType>()) {
      // make sure very time get the same ptr
      at::ArrayRef<torch::jit::Value*> unpack_tuple = torch::jit::createTupleUnpack(i);
      LOG_DEBUG("Tuple size " << unpack_tuple.size());
-      for (auto item: unpack_tuple) {
+      for (auto item : unpack_tuple) {
        torch::jit::Value* in = item;
        types.insert({in, get_value_first_calc_dtype_opt(b, i)});
      }
-    } else if(i->type()->isSubtypeOf(c10::ListType::ofTensors())) {
+    } else if (i->type()->isSubtypeOf(c10::ListType::ofTensors())) {
      LOG_INFO("Unsupported type of c10::ListType::ofTensors()");
    }
  }
@@ -212,7 +215,7 @@ CollectionTypeMap get_block_first_calc_dtypes_opt_collection(torch::jit::Block*
      torch::jit::Value* in = i;
      types.insert({in, {get_value_first_calc_dtype_opt(b, i)}});

-    } else if(i->type()->kind() == torch::jit::TypeKind::TupleType) {
+    } else if (i->type()->kind() == torch::jit::TypeKind::TupleType) {
      // TODO: to evaluate the data type of tuple element
      // make sure very time get the same ptr
      // c10::optional<at::ScalarType> tp = get_value_first_calc_dtype_opt(b, i);
@@ -220,9 +223,9 @@ CollectionTypeMap get_block_first_calc_dtypes_opt_collection(torch::jit::Block*
      // TODO: calculate the tuple element type, currently we use {} as default datatype
      // std::vector<c10::optional<at::ScalarType>> dytpes(unpack_tuple.size(), tp);
      std::vector<c10::optional<at::ScalarType>> dytpes(unpack_tuple.size());
-      types.insert({i, dytpes}); // insert an empty 
+      types.insert({i, dytpes}); // insert an empty

-    } else if(i->type()->kind() == torch::jit::TypeKind::ListType) {
+    } else if (i->type()->kind() == torch::jit::TypeKind::ListType) {
      // TODO: to decide the size of list and type of list element
      LOG_DEBUG("get_block_first_calc_dtypes_opt ListType: use size " << i->uses().size());
      c10::optional<at::ScalarType> tp = get_value_first_calc_dtype_opt(b, i);
@@ -234,8 +237,7 @@ CollectionTypeMap get_block_first_calc_dtypes_opt_collection(torch::jit::Block*
  return types;
}

-static auto core_input_container =
-    torch::class_<Input>("_torch_tensorrt_core_ir", "Input").def(torch::init<>());
+static auto core_input_container = torch::class_<Input>("_torch_tensorrt_core_ir", "Input").def(torch::init<>());

} // namespace ir
} // namespace core
diff --git a/workspace/core/conversion/converters/converter_util.h b/tmp/changes.txt
index cdf2ee5..b155499 100644
--- a/workspace/core/conversion/converters/converter_util.h
+++ b/tmp/changes.txt
@@ -1,8 +1,8 @@
#pragma once

+#include <limits>
#include <map>
#include <string>
-#include <limits>

#include "core/conversion/conversionctx/ConversionCtx.h"
#include "core/conversion/converters/Weights.h"
diff --git a/workspace/core/partitioning/shape_analysis.h b/tmp/changes.txt
index 2654699..e9c51fc 100644
--- a/workspace/core/partitioning/shape_analysis.h
+++ b/tmp/changes.txt
@@ -6,7 +6,6 @@ namespace torch_tensorrt {
namespace core {
namespace partitioning {

-
std::unordered_map<const torch::jit::Value*, torch::jit::IValue> generateRandomInputs(
    std::unordered_map<const torch::jit::Value*, std::vector<ir::Input>>& input_ranges,
    std::unordered_map<const torch::jit::Value*, std::vector<c10::optional<at::ScalarType>>>& input_types);
diff --git a/workspace/core/ir/ir.h b/tmp/changes.txt
index 966c747..a5225da 100644
--- a/workspace/core/ir/ir.h
+++ b/tmp/changes.txt
@@ -12,7 +12,7 @@ namespace core {
namespace ir {

struct Input : torch::CustomClassHolder {
-  Input() {};
+  Input(){};
  Input(
      std::vector<int64_t> shape,
      nvinfer1::DataType dtype = nvinfer1::DataType::kFLOAT,
@@ -42,8 +42,8 @@ struct Input : torch::CustomClassHolder {
struct GraphInputs {
  GraphInputs(std::vector<ir::Input> inputs);
  GraphInputs(torch::jit::IValue& input_signature);
-  torch::jit::IValue input_signature;  // nested Input, full input spec
-  std::vector<Input> inputs;  // flattend Input
+  torch::jit::IValue input_signature; // nested Input, full input spec
+  std::vector<Input> inputs; // flattend Input
  std::vector<std::vector<Input>> collection_inputs; // only support two layer nesting, e.g. ((a, b), [c, d], e)
};

@@ -67,7 +67,9 @@ CollectionInputSpecMap associate_specs_with_collection_inputs(
    ir::GraphInputs graph_inputs,
    StaticParams& static_params);
InputSpecMap pair_input_vals_with_specs(std::vector<const torch::jit::Value*> vals, std::vector<Input> specs);
-CollectionInputSpecMap pair_input_vals_with_specs_collection(std::vector<const torch::jit::Value*> vals, std::vector<std::vector<Input>>& specs);
+CollectionInputSpecMap pair_input_vals_with_specs_collection(
+    std::vector<const torch::jit::Value*> vals,
+    std::vector<std::vector<Input>>& specs);
std::vector<const torch::jit::Value*> get_tensor_inputs(
    std::shared_ptr<torch::jit::Graph>& g,
    StaticParams& static_params);
diff --git a/workspace/tests/core/conversion/converters/test_cast.cpp b/tmp/changes.txt
index 092cdb3..d26c7a0 100644
--- a/workspace/tests/core/conversion/converters/test_cast.cpp
+++ b/tmp/changes.txt
@@ -135,7 +135,6 @@ TEST(Converters, ATenBoolToINT32TensorConvertsCorrectly) {
  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt, 2e-6));
}

-
TEST(Converters, ATenToSingleConvertsCorrectly) {
  const auto graph = R"IR(
    graph(%y.1 : Tensor):
@@ -164,7 +163,6 @@ TEST(Converters, ATenToSingleConvertsCorrectly) {
  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt, 2e-6));
}

-
TEST(Converters, ATenTypeAsConvertsCorrectly) {
  const auto graph = R"IR(
      graph(%0 : Tensor,
diff --git a/workspace/tests/cpp/test_example_tensors.cpp b/tmp/changes.txt
index 3ec8831..256e6f1 100644
--- a/workspace/tests/cpp/test_example_tensors.cpp
+++ b/tmp/changes.txt
@@ -9,7 +9,6 @@ TEST_P(CppAPITests, InputsFromTensors) {
    trt_inputs_ivalues.push_back(in.clone());
  }

-
  auto inputs = std::vector<torch_tensorrt::Input>{trt_inputs_ivalues[0].toTensor()};
  auto spec = torch_tensorrt::ts::CompileSpec(inputs);

diff --git a/workspace/tests/cpp/test_collections.cpp b/tmp/changes.txt
index df2280b..829e82a 100644
--- a/workspace/tests/cpp/test_collections.cpp
+++ b/tmp/changes.txt
@@ -5,9 +5,7 @@
#include "torch/script.h"
#include "torch_tensorrt/torch_tensorrt.h"

-
TEST(CppAPITests, TestCollectionStandardTensorInput) {
-
  std::string path = "tests/modules/standard_tensor_input_scripted.jit.pt";
  torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kHalf);
  std::vector<at::Tensor> inputs;
@@ -24,7 +22,6 @@ TEST(CppAPITests, TestCollectionStandardTensorInput) {
  mod.eval();
  mod.to(torch::kCUDA);

-
  std::vector<torch::jit::IValue> inputs_;

  for (auto in : inputs) {
@@ -52,7 +49,6 @@ TEST(CppAPITests, TestCollectionStandardTensorInput) {
}

TEST(CppAPITests, TestCollectionTupleInput) {
-
  std::string path = "tests/modules/tuple_input_scripted.jit.pt";
  torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kHalf);

@@ -78,14 +74,12 @@ TEST(CppAPITests, TestCollectionTupleInput) {

  auto input_shape_ivalue = torch::jit::IValue(std::move(c10::make_intrusive<torch_tensorrt::Input>(input_shape)));

-
  std::tuple<torch::jit::IValue, torch::jit::IValue> input_shape_tuple(input_shape_ivalue, input_shape_ivalue);

  torch::jit::IValue complex_input_shape(input_shape_tuple);
  std::tuple<torch::jit::IValue> input_tuple2(complex_input_shape);
  torch::jit::IValue complex_input_shape2(input_tuple2);

-
  auto compile_settings = torch_tensorrt::ts::CompileSpec(complex_input_shape2);
  compile_settings.require_full_compilation = false;
  compile_settings.min_block_size = 3;
@@ -100,9 +94,7 @@ TEST(CppAPITests, TestCollectionTupleInput) {
  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toTensor(), trt_out.toTensor(), 1e-5));
}

-
TEST(CppAPITests, TestCollectionListInput) {
-
  std::string path = "tests/modules/list_input_scripted.jit.pt";
  torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kHalf);
  std::vector<at::Tensor> inputs;
@@ -118,7 +110,6 @@ TEST(CppAPITests, TestCollectionListInput) {
  mod.eval();
  mod.to(torch::kCUDA);

-
  std::vector<torch::jit::IValue> inputs_;

  for (auto in : inputs) {
@@ -134,7 +125,6 @@ TEST(CppAPITests, TestCollectionListInput) {

  complex_inputs.push_back(input_list_ivalue);

-
  auto out = mod.forward(complex_inputs);
  LOG_DEBUG("Finish torchscirpt forward");

@@ -146,7 +136,6 @@ TEST(CppAPITests, TestCollectionListInput) {
  list.push_back(input_shape_ivalue);
  list.push_back(input_shape_ivalue);

-
  torch::jit::IValue complex_input_shape(list);
  std::tuple<torch::jit::IValue> input_tuple2(complex_input_shape);
  torch::jit::IValue complex_input_shape2(input_tuple2);
@@ -166,9 +155,7 @@ TEST(CppAPITests, TestCollectionListInput) {
  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toTensor(), trt_out.toTensor(), 1e-5));
}

-
TEST(CppAPITests, TestCollectionTupleInputOutput) {
-
  std::string path = "tests/modules/tuple_input_output_scripted.jit.pt";

  torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kHalf);
@@ -183,7 +170,6 @@ TEST(CppAPITests, TestCollectionTupleInputOutput) {
  mod.eval();
  mod.to(torch::kCUDA);

-
  std::vector<torch::jit::IValue> complex_inputs, complex_inputs_list;
  std::tuple<torch::jit::IValue, torch::jit::IValue> input_tuple(in0, in0);

@@ -196,7 +182,6 @@ TEST(CppAPITests, TestCollectionTupleInputOutput) {

  auto input_shape_ivalue = torch::jit::IValue(std::move(c10::make_intrusive<torch_tensorrt::Input>(input_shape)));

-
  std::tuple<torch::jit::IValue, torch::jit::IValue> input_shape_tuple(input_shape_ivalue, input_shape_ivalue);

  torch::jit::IValue complex_input_shape(input_shape_tuple);
@@ -217,13 +202,13 @@ TEST(CppAPITests, TestCollectionTupleInputOutput) {
  LOG_DEBUG("Finish compile");
  auto trt_out = trt_mod.forward(complex_inputs);

-  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toTuple()->elements()[0].toTensor(), trt_out.toTuple()->elements()[0].toTensor(), 1e-5));
-  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toTuple()->elements()[1].toTensor(), trt_out.toTuple()->elements()[1].toTensor(), 1e-5));
+  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(
+      out.toTuple()->elements()[0].toTensor(), trt_out.toTuple()->elements()[0].toTensor(), 1e-5));
+  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(
+      out.toTuple()->elements()[1].toTensor(), trt_out.toTuple()->elements()[1].toTensor(), 1e-5));
}

-
TEST(CppAPITests, TestCollectionListInputOutput) {
-
  std::string path = "tests/modules/list_input_output_scripted.jit.pt";
  torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kHalf);
  std::vector<at::Tensor> inputs;
@@ -239,7 +224,6 @@ TEST(CppAPITests, TestCollectionListInputOutput) {
  mod.eval();
  mod.to(torch::kCUDA);

-
  std::vector<torch::jit::IValue> inputs_;

  for (auto in : inputs) {
@@ -255,7 +239,6 @@ TEST(CppAPITests, TestCollectionListInputOutput) {

  complex_inputs.push_back(input_list_ivalue);

-
  auto out = mod.forward(complex_inputs);
  LOG_DEBUG("Finish torchscirpt forward");

@@ -263,13 +246,11 @@ TEST(CppAPITests, TestCollectionListInputOutput) {

  auto input_shape_ivalue = torch::jit::IValue(std::move(c10::make_intrusive<torch_tensorrt::Input>(input_shape)));

-
  c10::TypePtr elementType = input_shape_ivalue.type();
  auto list = c10::impl::GenericList(elementType);
  list.push_back(input_shape_ivalue);
  list.push_back(input_shape_ivalue);

-
  torch::jit::IValue complex_input_shape(list);
  std::tuple<torch::jit::IValue> input_tuple2(complex_input_shape);
  torch::jit::IValue complex_input_shape2(input_tuple2);
@@ -288,13 +269,13 @@ TEST(CppAPITests, TestCollectionListInputOutput) {
  LOG_DEBUG("Finish compile");
  auto trt_out = trt_mod.forward(complex_inputs);

-  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toList().vec()[0].toTensor(), trt_out.toList().vec()[0].toTensor(), 1e-5));
-  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toList().vec()[1].toTensor(), trt_out.toList().vec()[1].toTensor(), 1e-5));
+  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(
+      out.toList().vec()[0].toTensor(), trt_out.toList().vec()[0].toTensor(), 1e-5));
+  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(
+      out.toList().vec()[1].toTensor(), trt_out.toList().vec()[1].toTensor(), 1e-5));
}

-
TEST(CppAPITests, TestCollectionComplexModel) {
-
  std::string path = "tests/modules/list_input_tuple_output_scripted.jit.pt";
  torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kHalf);
  std::vector<at::Tensor> inputs;
@@ -310,7 +291,6 @@ TEST(CppAPITests, TestCollectionComplexModel) {
  mod.eval();
  mod.to(torch::kCUDA);

-
  std::vector<torch::jit::IValue> inputs_;

  for (auto in : inputs) {
@@ -326,7 +306,6 @@ TEST(CppAPITests, TestCollectionComplexModel) {

  complex_inputs.push_back(input_list_ivalue);

-
  auto out = mod.forward(complex_inputs);
  LOG_DEBUG("Finish torchscirpt forward");

@@ -339,7 +318,6 @@ TEST(CppAPITests, TestCollectionComplexModel) {
  list.push_back(input_shape_ivalue);
  list.push_back(input_shape_ivalue);

-
  torch::jit::IValue complex_input_shape(list);
  std::tuple<torch::jit::IValue> input_tuple2(complex_input_shape);
  torch::jit::IValue complex_input_shape2(input_tuple2);
@@ -358,6 +336,8 @@ TEST(CppAPITests, TestCollectionComplexModel) {
  LOG_DEBUG("Finish compile");
  auto trt_out = trt_mod.forward(complex_inputs);

-  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toTuple()->elements()[0].toTensor(), trt_out.toTuple()->elements()[0].toTensor(), 1e-5));
-  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toTuple()->elements()[1].toTensor(), trt_out.toTuple()->elements()[1].toTensor(), 1e-5));
+  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(
+      out.toTuple()->elements()[0].toTensor(), trt_out.toTuple()->elements()[0].toTensor(), 1e-5));
+  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(
+      out.toTuple()->elements()[1].toTensor(), trt_out.toTuple()->elements()[1].toTensor(), 1e-5));
}
\ No newline at end of file
diff --git a/workspace/cpp/bin/torchtrtc/main.cpp b/tmp/changes.txt
index 6c207d7..51ec2c5 100644
--- a/workspace/cpp/bin/torchtrtc/main.cpp
+++ b/tmp/changes.txt
@@ -117,8 +117,7 @@ int main(int argc, char** argv) {
      parser, "num_iters", "Number of averaging timing iterations used to select kernels", {"num-avg-timing-iters"});
  args::ValueFlag<uint64_t> workspace_size(
      parser, "workspace_size", "Maximum size of workspace given to TensorRT", {"workspace-size"});
-  args::ValueFlag<uint64_t> dla_sram_size(
-      parser, "dla_sram_size", "DLA managed SRAM size", {"dla-sram-size"});
+  args::ValueFlag<uint64_t> dla_sram_size(parser, "dla_sram_size", "DLA managed SRAM size", {"dla-sram-size"});
  args::ValueFlag<uint64_t> dla_local_dram_size(
      parser, "dla_local_dram_size", "DLA Local DRAM size", {"dla-local-dram-size"});
  args::ValueFlag<uint64_t> dla_global_dram_size(
diff --git a/workspace/cpp/src/compile_spec.cpp b/tmp/changes.txt
index 1fb4c56..432b070 100644
--- a/workspace/cpp/src/compile_spec.cpp
+++ b/tmp/changes.txt
@@ -29,40 +29,38 @@ CompileSpec::CompileSpec(std::vector<std::vector<int64_t>> fixed_sizes) {
}

CompileSpec::CompileSpec(std::vector<Input> inputs) {
-    graph_inputs.inputs = std::move(inputs);
+  graph_inputs.inputs = std::move(inputs);
}

CompileSpec::CompileSpec(torch::jit::IValue input_signature) {
-    graph_inputs.input_signature = input_signature;
+  graph_inputs.input_signature = input_signature;
}

-
-
void to_internal_input_signature(torch::jit::IValue input_ivalue, torch::jit::IValue& converted_ivalue) {
-    if (input_ivalue.isTuple()) {
-      auto input_tuple = input_ivalue.toTuple();
-      std::vector<torch::jit::IValue> converted_elements;
-      for (auto item: input_tuple->elements()) {
-        torch::jit::IValue converted_item;
-        to_internal_input_signature(item, converted_item);
-        converted_elements.push_back(converted_item);
-        auto tuple_ptr = c10::ivalue::Tuple::create(converted_elements);
-        converted_ivalue = torch::jit::IValue(tuple_ptr);
-      }
-    } else if(input_ivalue.isList()) {
-      auto input_list = input_ivalue.toList().vec();
-      c10::TypePtr type = input_list[0].type();
-      auto converted_elements = c10::impl::GenericList(type);
-      for (auto item: input_list) {
-        torch::jit::IValue converted_item;
-        to_internal_input_signature(item, converted_item);
-        converted_elements.push_back(converted_item);
-      }
-      converted_ivalue = torch::jit::IValue(converted_elements);
-    } else if(input_ivalue.isCustomClass()) {
-      torchtrt::core::ir::Input cur_input = to_internal_input(*(input_ivalue.toCustomClass<torchtrt::Input>()));
-      converted_ivalue = torch::jit::IValue(std::move(c10::make_intrusive<torch_tensorrt::core::ir::Input>(cur_input)));
+  if (input_ivalue.isTuple()) {
+    auto input_tuple = input_ivalue.toTuple();
+    std::vector<torch::jit::IValue> converted_elements;
+    for (auto item : input_tuple->elements()) {
+      torch::jit::IValue converted_item;
+      to_internal_input_signature(item, converted_item);
+      converted_elements.push_back(converted_item);
+      auto tuple_ptr = c10::ivalue::Tuple::create(converted_elements);
+      converted_ivalue = torch::jit::IValue(tuple_ptr);
    }
+  } else if (input_ivalue.isList()) {
+    auto input_list = input_ivalue.toList().vec();
+    c10::TypePtr type = input_list[0].type();
+    auto converted_elements = c10::impl::GenericList(type);
+    for (auto item : input_list) {
+      torch::jit::IValue converted_item;
+      to_internal_input_signature(item, converted_item);
+      converted_elements.push_back(converted_item);
+    }
+    converted_ivalue = torch::jit::IValue(converted_elements);
+  } else if (input_ivalue.isCustomClass()) {
+    torchtrt::core::ir::Input cur_input = to_internal_input(*(input_ivalue.toCustomClass<torchtrt::Input>()));
+    converted_ivalue = torch::jit::IValue(std::move(c10::make_intrusive<torch_tensorrt::core::ir::Input>(cur_input)));
+  }
}

torchtrt::core::CompileSpec init_compile_spec(CompileSpec external) {
diff --git a/workspace/cpp/src/torch_tensorrt.cpp b/tmp/changes.txt
index 9381319..22855ae 100644
--- a/workspace/cpp/src/torch_tensorrt.cpp
+++ b/tmp/changes.txt
@@ -53,6 +53,5 @@ void set_device(const int gpu_id) {
  torch_tensorrt::core::set_device(gpu_id);
}

-static auto tensorrt_input_container =
-    torch::class_<Input>("_torch_tensorrt", "Input").def(torch::init<>());
+static auto tensorrt_input_container = torch::class_<Input>("_torch_tensorrt", "Input").def(torch::init<>());
} // namespace torch_tensorrt
diff --git a/workspace/cpp/include/torch_tensorrt/torch_tensorrt.h b/tmp/changes.txt
index 11dc5d7..6a7035e 100644
--- a/workspace/cpp/include/torch_tensorrt/torch_tensorrt.h
+++ b/tmp/changes.txt
@@ -364,7 +364,7 @@ class TORCHTRT_API TensorFormat {
 * signifying a static input shape or a set of three input shapes representing
 * the min, optiminal and max input shapes allowed for the engine.
 */
-struct TORCHTRT_API Input : torch::CustomClassHolder{
+struct TORCHTRT_API Input : torch::CustomClassHolder {
  /// Minimum acceptable input size into the engine
  std::vector<int64_t> min_shape;
  /// Optimal input size into the engine (size optimized for given kernels accept any size in min max range)
@@ -520,7 +520,7 @@ struct TORCHTRT_API Input : torch::CustomClassHolder{
 * This struct can either hold a complex inputs of shape or a flattened one,
 */
struct TORCHTRT_API GraphInputs {
-  torch::jit::IValue input_signature;  // nested Input, full input spec
+  torch::jit::IValue input_signature; // nested Input, full input spec
  std::vector<Input> inputs; // flatten input spec
};

ERROR: Some files do not conform to style guidelines

Signed-off-by: Bo Wang <bowa@nvidia.com>

github-actions

There are some changes that do not conform to C++ style guidelines:

diff --git a/workspace/py/torch_tensorrt/csrc/torch_tensorrt_py.cpp b/tmp/changes.txt
index 6247789..6b1ffd4 100644
--- a/workspace/py/torch_tensorrt/csrc/torch_tensorrt_py.cpp
+++ b/tmp/changes.txt
@@ -1,8 +1,8 @@
#include "pybind11/pybind11.h"
#include "pybind11/stl.h"

-#include "Python.h"
#include "ATen/core/jit_type.h"
+#include "Python.h"
#include "core/compiler.h"
#include "core/conversion/conversion.h"
#include "tensorrt_classes.h"
@@ -182,7 +182,8 @@ PYBIND11_MODULE(_C, m) {
  py::class_<InputSignature>(m, "InputSignature")
      .def(pybind11::init([](py::object py_obj) {
        InputSignature input_signature;
-        input_signature.signature_ivalue = torch::jit::toIValue(std::move(py_obj), c10::PyObjectType::get(), c10::nullopt);
+        input_signature.signature_ivalue =
+            torch::jit::toIValue(std::move(py_obj), c10::PyObjectType::get(), c10::nullopt);
        return input_signature;
      }))
      .def("__str__", &InputSignature::to_str)
diff --git a/workspace/py/torch_tensorrt/csrc/tensorrt_classes.cpp b/tmp/changes.txt
index 9eb58b3..bccfdc0 100644
--- a/workspace/py/torch_tensorrt/csrc/tensorrt_classes.cpp
+++ b/tmp/changes.txt
@@ -191,40 +191,40 @@ std::string TorchFallback::to_str() {
}

void to_internal_input_signature(torch::jit::IValue input_ivalue, torch::jit::IValue& converted_ivalue) {
-    if (input_ivalue.isTuple()) {
-      auto input_tuple = input_ivalue.toTuple();
-      std::vector<torch::jit::IValue> converted_elements;
-      for (auto item: input_tuple->elements()) {
-        torch::jit::IValue converted_item;
-        to_internal_input_signature(item, converted_item);
-        converted_elements.push_back(converted_item);
-        auto tuple_ptr = c10::ivalue::Tuple::create(converted_elements);
-        converted_ivalue = torch::jit::IValue(tuple_ptr);
-      }
-    } else if(input_ivalue.isList()) {
-      auto input_list = input_ivalue.toList().vec();
-      c10::TypePtr type = input_list[0].type();
-      auto converted_elements = c10::impl::GenericList(type);
-      for (auto item: input_list) {
-        torch::jit::IValue converted_item;
-        to_internal_input_signature(item, converted_item);
-        converted_elements.push_back(converted_item);
-      }
-      converted_ivalue = torch::jit::IValue(converted_elements);
-    } else if(input_ivalue.isCustomClass()) {
-      core::ir::Input cur_input = (*(input_ivalue.toCustomClass<Input>())).toInternalInput();
-      converted_ivalue = torch::jit::IValue(std::move(c10::make_intrusive<core::ir::Input>(cur_input)));
-    } else if(input_ivalue.isPyObject()) {
-      auto py_object_holder = input_ivalue.toPyObjectHolder();
-      auto infer_type = py_object_holder->tryToInferType();
-      auto type = infer_type.type();
-      torch::jit::IValue ival = py_object_holder->toIValue(type);
+  if (input_ivalue.isTuple()) {
+    auto input_tuple = input_ivalue.toTuple();
+    std::vector<torch::jit::IValue> converted_elements;
+    for (auto item : input_tuple->elements()) {
      torch::jit::IValue converted_item;
-      to_internal_input_signature(ival, converted_item);
-      converted_ivalue = torch::jit::IValue(converted_item);
-    } else {
-      LOG_ERROR("Unknown input spec type");
+      to_internal_input_signature(item, converted_item);
+      converted_elements.push_back(converted_item);
+      auto tuple_ptr = c10::ivalue::Tuple::create(converted_elements);
+      converted_ivalue = torch::jit::IValue(tuple_ptr);
    }
+  } else if (input_ivalue.isList()) {
+    auto input_list = input_ivalue.toList().vec();
+    c10::TypePtr type = input_list[0].type();
+    auto converted_elements = c10::impl::GenericList(type);
+    for (auto item : input_list) {
+      torch::jit::IValue converted_item;
+      to_internal_input_signature(item, converted_item);
+      converted_elements.push_back(converted_item);
+    }
+    converted_ivalue = torch::jit::IValue(converted_elements);
+  } else if (input_ivalue.isCustomClass()) {
+    core::ir::Input cur_input = (*(input_ivalue.toCustomClass<Input>())).toInternalInput();
+    converted_ivalue = torch::jit::IValue(std::move(c10::make_intrusive<core::ir::Input>(cur_input)));
+  } else if (input_ivalue.isPyObject()) {
+    auto py_object_holder = input_ivalue.toPyObjectHolder();
+    auto infer_type = py_object_holder->tryToInferType();
+    auto type = infer_type.type();
+    torch::jit::IValue ival = py_object_holder->toIValue(type);
+    torch::jit::IValue converted_item;
+    to_internal_input_signature(ival, converted_item);
+    converted_ivalue = torch::jit::IValue(converted_item);
+  } else {
+    LOG_ERROR("Unknown input spec type");
+  }
}

core::CompileSpec init_compile_spec(CompileSpec external) {
@@ -281,11 +281,17 @@ core::CompileSpec CompileSpec::toInternalCompileSpec() {
  info.convert_info.engine_settings.num_avg_timing_iters = num_avg_timing_iters;
  TORCHTRT_CHECK(workspace_size >= 0, "workspace_size must be 0 or greater");
  info.convert_info.engine_settings.workspace_size = workspace_size;
-  TORCHTRT_CHECK(dla_sram_size >= 4096, "DLA managed SRAM size must be at least 4 KiB and must be a power of 2. This defaults to 1 MiB");
+  TORCHTRT_CHECK(
+      dla_sram_size >= 4096,
+      "DLA managed SRAM size must be at least 4 KiB and must be a power of 2. This defaults to 1 MiB");
  info.convert_info.engine_settings.dla_sram_size = dla_sram_size;
-  TORCHTRT_CHECK(dla_local_dram_size >= 4096, "DLA Local DRAM size must be at least 4 KiB and must be a power of 2. This defaults to 1 GiB");
+  TORCHTRT_CHECK(
+      dla_local_dram_size >= 4096,
+      "DLA Local DRAM size must be at least 4 KiB and must be a power of 2. This defaults to 1 GiB");
  info.convert_info.engine_settings.dla_local_dram_size = dla_local_dram_size;
-  TORCHTRT_CHECK(dla_global_dram_size >= 4096, "DLA Global DRAM size must be at least 4 KiB and must be a power of 2. This defaults to 512 MiB");
+  TORCHTRT_CHECK(
+      dla_global_dram_size >= 4096,
+      "DLA Global DRAM size must be at least 4 KiB and must be a power of 2. This defaults to 512 MiB");
  info.convert_info.engine_settings.dla_global_dram_size = dla_global_dram_size;
  return info;
}
@@ -304,7 +310,7 @@ std::string CompileSpec::stringify() {
  }
  ss << "    \"Enabled Precision\": [";
  for (auto p : enabled_precisions) {
-    ss << to_str(p) << ", " ;
+    ss << to_str(p) << ", ";
  }
  ss << "]" << std::endl;
  ss << "    \"TF32 Disabled\": " << disable_tf32 << std::endl;
diff --git a/workspace/py/torch_tensorrt/csrc/register_tensorrt_classes.cpp b/tmp/changes.txt
index 0eb6fba..274b40d 100644
--- a/workspace/py/torch_tensorrt/csrc/register_tensorrt_classes.cpp
+++ b/tmp/changes.txt
@@ -28,7 +28,8 @@ void RegisterTRTCompileSpec() {
          .def(torch::init<>())
          .def("__str__", &torch_tensorrt::pyapi::InputSignature::to_str);

-  ADD_FIELD_GET_SET_REGISTRATION(TRTInputSignatureTSRegistration, torch_tensorrt::pyapi::InputSignature, signature_ivalue);
+  ADD_FIELD_GET_SET_REGISTRATION(
+      TRTInputSignatureTSRegistration, torch_tensorrt::pyapi::InputSignature, signature_ivalue);

  static auto TORCHTRT_UNUSED TRTDeviceTSRegistration =
      torch::class_<torch_tensorrt::pyapi::Device>("tensorrt", "_Device")
@@ -73,7 +74,8 @@ void RegisterTRTCompileSpec() {
  ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, workspace_size);
  ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, dla_sram_size);
  ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, dla_local_dram_size);
-  ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, dla_global_dram_size);
+  ADD_FIELD_GET_SET_REGISTRATION(
+      TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, dla_global_dram_size);
  ADD_FIELD_GET_SET_REGISTRATION(
      TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, truncate_long_and_double);
}
diff --git a/workspace/py/torch_tensorrt/csrc/tensorrt_classes.h b/tmp/changes.txt
index d3b2274..be2fab3 100644
--- a/workspace/py/torch_tensorrt/csrc/tensorrt_classes.h
+++ b/tmp/changes.txt
@@ -58,7 +58,7 @@ struct Input : torch::CustomClassHolder {
};

struct InputSignature : torch::CustomClassHolder {
-  torch::jit::IValue signature_ivalue;  // nested Input, full input spec
+  torch::jit::IValue signature_ivalue; // nested Input, full input spec
  ADD_FIELD_GET_SET(signature_ivalue, torch::jit::IValue);
  std::string to_str();
};
diff --git a/workspace/core/compiler.cpp b/tmp/changes.txt
index e44ece5..caee900 100644
--- a/workspace/core/compiler.cpp
+++ b/tmp/changes.txt
@@ -308,70 +308,78 @@ void MapInputsAndDetermineDTypes(
    std::shared_ptr<torch::jit::Graph>& g,
    ir::StaticParams& static_params,
    ir::CollectionTypeMap& first_use_type_map) {
-    cfg.convert_info.collection_input_spec_map = std::move(ir::associate_specs_with_collection_inputs(g, cfg.graph_inputs, static_params));
+  cfg.convert_info.collection_input_spec_map =
+      std::move(ir::associate_specs_with_collection_inputs(g, cfg.graph_inputs, static_params));

-    auto collection_inputs = ir::get_collection_inputs(g, static_params);
-    LOG_DEBUG("In MapInputsAndDetermineDTypes, the g->inputs() size is " << g->inputs().size() << ", CollectionInputSpecMap size is" << collection_inputs.size());
+  auto collection_inputs = ir::get_collection_inputs(g, static_params);
+  LOG_DEBUG(
+      "In MapInputsAndDetermineDTypes, the g->inputs() size is "
+      << g->inputs().size() << ", CollectionInputSpecMap size is" << collection_inputs.size());

-    for (auto in : collection_inputs) {
-      std::vector<ir::Input>& spec = cfg.convert_info.collection_input_spec_map.find(in)->second;
-      std::vector<c10::optional<at::ScalarType>> est_type_opt;
+  for (auto in : collection_inputs) {
+    std::vector<ir::Input>& spec = cfg.convert_info.collection_input_spec_map.find(in)->second;
+    std::vector<c10::optional<at::ScalarType>> est_type_opt;

-      auto est_it = first_use_type_map.find(in);
-      if (est_it != first_use_type_map.end()) {
-        est_type_opt = first_use_type_map.find(in)->second;
-      }
-      // traverse elements in est_type_out and spec
-      for (size_t i = 0; i < est_type_opt.size(); i++) {
-        if (est_type_opt[i] && !spec[i].dtype_is_user_defined) {
-          // If we can calculate the type from the graph and the type was not defined by the user then use the calculated
-          // type
-          LOG_INFO(
-              "Since input type is not explicitly defined, infering using first tensor calculation\n  Inferred input "
-              << in->debugName() << " has type " << est_type_opt[i].value());
-          spec[i].dtype = util::ScalarTypeToTRTDataType(est_type_opt[i].value());
-        } else if (!est_type_opt[i] && !spec[i].dtype_is_user_defined) {
-          // If we cannot calculate the type and the user did not define the type, then default to FP32
-          LOG_WARNING(
-              "Cannot infer input type from calcuations in graph for input "
-              << in->debugName() << ". Assuming it is Float32. If not, specify input type explicity");
-          spec[i].dtype = nvinfer1::DataType::kFLOAT;
-        } else if (spec[i].dtype_is_user_defined && cfg.partition_info.enabled) {
-          if (!est_type_opt[i]) {
-            LOG_INFO("Cannot infer input tensor dtype in graph, compiler is going to use the user setting");
+    auto est_it = first_use_type_map.find(in);
+    if (est_it != first_use_type_map.end()) {
+      est_type_opt = first_use_type_map.find(in)->second;
+    }
+    // traverse elements in est_type_out and spec
+    for (size_t i = 0; i < est_type_opt.size(); i++) {
+      if (est_type_opt[i] && !spec[i].dtype_is_user_defined) {
+        // If we can calculate the type from the graph and the type was not defined by the user then use the calculated
+        // type
+        LOG_INFO(
+            "Since input type is not explicitly defined, infering using first tensor calculation\n  Inferred input "
+            << in->debugName() << " has type " << est_type_opt[i].value());
+        spec[i].dtype = util::ScalarTypeToTRTDataType(est_type_opt[i].value());
+      } else if (!est_type_opt[i] && !spec[i].dtype_is_user_defined) {
+        // If we cannot calculate the type and the user did not define the type, then default to FP32
+        LOG_WARNING(
+            "Cannot infer input type from calcuations in graph for input "
+            << in->debugName() << ". Assuming it is Float32. If not, specify input type explicity");
+        spec[i].dtype = nvinfer1::DataType::kFLOAT;
+      } else if (spec[i].dtype_is_user_defined && cfg.partition_info.enabled) {
+        if (!est_type_opt[i]) {
+          LOG_INFO("Cannot infer input tensor dtype in graph, compiler is going to use the user setting");
+          std::stringstream ss;
+          ss << "For input " << in->debugName() << ", found user specified input dtype as ";
+          ss << cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype;
+          ss << ". The compiler is going to use the user setting "
+             << cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype;
+          auto warn_str = ss.str();
+          LOG_WARNING(warn_str);
+          // Overwrite type map with user settings
+          first_use_type_map[in][i] = {
+              util::TRTDataTypeToScalarType(cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype)};
+
+        } else {
+          if (util::TRTDataTypeToScalarType(cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype) !=
+              est_type_opt[i].value()) {
            std::stringstream ss;
            ss << "For input " << in->debugName() << ", found user specified input dtype as ";
            ss << cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype;
-            ss << ". The compiler is going to use the user setting " << cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype;
+            ss << ", however when inspecting the graph, the input type expected was inferred to be ";
+            ss << est_type_opt[i].value() << std::endl;
+            ss << "The compiler is going to use the user setting "
+               << cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype;
+            ss << "\nThis conflict may cause an error at runtime due to partial compilation being enabled and therefore\n";
+            ss << "compatibility with PyTorch's data type convention is required.\n";
+            ss << "If you do indeed see errors at runtime either:\n";
+            ss << "- Remove the dtype spec for " << in->debugName() << std::endl;
+            ss << "- Disable partial compilation by setting require_full_compilation to True";
            auto warn_str = ss.str();
            LOG_WARNING(warn_str);
            // Overwrite type map with user settings
-            first_use_type_map[in][i] = {util::TRTDataTypeToScalarType(cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype)};
-
-          } else {
-            if (util::TRTDataTypeToScalarType(cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype) != est_type_opt[i].value()) {
-              std::stringstream ss;
-              ss << "For input " << in->debugName() << ", found user specified input dtype as ";
-              ss << cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype;
-              ss << ", however when inspecting the graph, the input type expected was inferred to be ";
-              ss << est_type_opt[i].value() << std::endl;
-              ss << "The compiler is going to use the user setting " << cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype;
-              ss << "\nThis conflict may cause an error at runtime due to partial compilation being enabled and therefore\n";
-              ss << "compatibility with PyTorch's data type convention is required.\n";
-              ss << "If you do indeed see errors at runtime either:\n";
-              ss << "- Remove the dtype spec for " << in->debugName() << std::endl;
-              ss << "- Disable partial compilation by setting require_full_compilation to True";
-              auto warn_str = ss.str();
-              LOG_WARNING(warn_str);
-              // Overwrite type map with user settings
-              first_use_type_map[in][i] = {util::TRTDataTypeToScalarType(cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype)};
-            }
+            first_use_type_map[in][i] = {
+                util::TRTDataTypeToScalarType(cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype)};
          }
-        } else {
-          // The user defined the type so no changes are necessary
        }
+      } else {
+        // The user defined the type so no changes are necessary
      }
    }
+  }
  // }
}

@@ -425,12 +433,13 @@ torch::jit::Module CompileGraph(const torch::jit::Module& mod, CompileSpec cfg)

      if (cfg.partition_info.enabled &&
          (!(cfg.lower_info.forced_fallback_modules.size() == 0 &&
-            cfg.partition_info.forced_fallback_operators.size() == 0 && isBlockConvertible)
-            || outputIsCollection)) {
-
+             cfg.partition_info.forced_fallback_operators.size() == 0 && isBlockConvertible) ||
+           outputIsCollection)) {
        std::unordered_map<torch::jit::Node*, int> fallback_nodes;
-        auto collection_input_ivalues_map = partitioning::generateRandomInputs(cfg.convert_info.collection_input_spec_map, first_use_types);
-        auto graph_and_mapping = ConstructFallbackGraph(new_mod, g->block(), collection_input_ivalues_map, cfg, static_params, fallback_nodes);
+        auto collection_input_ivalues_map =
+            partitioning::generateRandomInputs(cfg.convert_info.collection_input_spec_map, first_use_types);
+        auto graph_and_mapping = ConstructFallbackGraph(
+            new_mod, g->block(), collection_input_ivalues_map, cfg, static_params, fallback_nodes);
        new_g = graph_and_mapping.first;
        // renaming the input name of graph after fallback to ensure pytorch deserialize it correctly
        for (size_t i = 0; i < new_g->inputs().size(); ++i) {
diff --git a/workspace/core/conversion/conversion.cpp b/tmp/changes.txt
index 914f1dd..5f4b20e 100644
--- a/workspace/core/conversion/conversion.cpp
+++ b/tmp/changes.txt
@@ -135,12 +135,10 @@ void AddLayer(ConversionCtx* ctx, const torch::jit::Node* n) {
                       << "please report this error to https://www.github.com/NVIDIA/Torch-TensorRT/issues");
}

-void AddInputs(
-    ConversionCtx* ctx,
-    c10::ArrayRef<const torch::jit::Value*> inputs,
-    ConversionInfo& conversion_info) {
+void AddInputs(ConversionCtx* ctx, c10::ArrayRef<const torch::jit::Value*> inputs, ConversionInfo& conversion_info) {
  std::unordered_map<const torch::jit::Value*, ir::Input>& input_specs = conversion_info.inputs;
-  std::unordered_map<const torch::jit::Value*, std::vector<ir::Input>> collection_input_spec = conversion_info.collection_input_spec_map;
+  std::unordered_map<const torch::jit::Value*, std::vector<ir::Input>> collection_input_spec =
+      conversion_info.collection_input_spec_map;

  std::vector<const torch::jit::Value*> input_tensors;
  for (auto in : inputs) {
@@ -173,7 +171,7 @@ void AddInputs(
        "Cannot find an input spec associated with input: " << in->debugName());
    ir::Input spec;
    if (input_specs.find(in) != input_specs.end()) {
-        spec = input_specs.find(in)->second;
+      spec = input_specs.find(in)->second;
    } else {
      spec = collection_input_spec.find(in)->second[0]; // assume input is tensor
    }
@@ -559,8 +557,9 @@ std::set<std::string> ConvertableOpsInBlock(const torch::jit::Block* b) {
}

bool OutputIsCollection(const torch::jit::Block* b) {
-  for (auto out: b->outputs()) {
-    if(out->type()->kind() == torch::jit::TypeKind::TupleType || out->type()->kind() == torch::jit::TypeKind::ListType) {
+  for (auto out : b->outputs()) {
+    if (out->type()->kind() == torch::jit::TypeKind::TupleType ||
+        out->type()->kind() == torch::jit::TypeKind::ListType) {
      return true;
    }
  }
diff --git a/workspace/core/conversion/conversionctx/ConversionCtx.cpp b/tmp/changes.txt
index a24a159..71159eb 100644
--- a/workspace/core/conversion/conversionctx/ConversionCtx.cpp
+++ b/tmp/changes.txt
@@ -107,7 +107,7 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings)
  }

  cfg->setAvgTimingIterations(settings.num_avg_timing_iters);
-  if (settings.workspace_size != 0){
+  if (settings.workspace_size != 0) {
    cfg->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, settings.workspace_size);
  }

@@ -124,13 +124,13 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings)
        settings.enabled_precisions.find(nvinfer1::DataType::kFLOAT) == settings.enabled_precisions.end(),
        "DLA supports only fp16 or int8 precision");
    cfg->setDLACore(settings.device.dla_core);
-    if (settings.dla_sram_size != 1048576){
+    if (settings.dla_sram_size != 1048576) {
      cfg->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kDLA_MANAGED_SRAM, settings.dla_sram_size);
    }
-    if (settings.dla_local_dram_size != 1073741824){
+    if (settings.dla_local_dram_size != 1073741824) {
      cfg->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kDLA_LOCAL_DRAM, settings.dla_local_dram_size);
    }
-    if (settings.dla_global_dram_size != 536870912){
+    if (settings.dla_global_dram_size != 536870912) {
      cfg->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kDLA_GLOBAL_DRAM, settings.dla_global_dram_size);
    }
  }
diff --git a/workspace/core/conversion/converters/converter_util.cpp b/tmp/changes.txt
index a6a2bbd..7452615 100644
--- a/workspace/core/conversion/converters/converter_util.cpp
+++ b/tmp/changes.txt
@@ -207,13 +207,13 @@ nvinfer1::ITensor* clamp(
    nvinfer1::ITensor* lower_bound,
    nvinfer1::ITensor* upper_bound,
    std::string const& name) {
-
  auto max_layer = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kMAX, x, lower_bound, "max layer for " + name);
  TORCHTRT_CHECK(max_layer, "Unable to create max layer for clamp");
  LOG_DEBUG(ctx->logger, "Create " << max_layer->getName() << " for clamp");
  auto max_itensor = max_layer->getOutput(0);

-  auto min_layer = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kMIN, max_itensor, upper_bound, "min layer for " + name);
+  auto min_layer =
+      add_elementwise(ctx, nvinfer1::ElementWiseOperation::kMIN, max_itensor, upper_bound, "min layer for " + name);
  TORCHTRT_CHECK(min_layer, "Unable to create min layer for clamp");
  LOG_DEBUG(ctx->logger, "Create " << min_layer->getName() << " for clamp");
  auto min_itensor = min_layer->getOutput(0);
@@ -227,13 +227,13 @@ nvinfer1::ITensor* clamp_to_input_dim(
    nvinfer1::ITensor* input_dim,
    int nbdims,
    std::string const& name) {
-
  auto zero = torch::zeros({nbdims}).to(torch::kI32);
  auto zero_itensor = tensor_to_const(ctx, zero);
  auto one = torch::ones({nbdims}).to(torch::kI32);
  auto one_itensor = tensor_to_const(ctx, one);

-  auto upper_bound_layer = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kSUB, input_dim, one_itensor, "sub layer for " + name);
+  auto upper_bound_layer =
+      add_elementwise(ctx, nvinfer1::ElementWiseOperation::kSUB, input_dim, one_itensor, "sub layer for " + name);
  TORCHTRT_CHECK(upper_bound_layer, "Unable to create sub layer for clamp to inputDim");
  LOG_DEBUG(ctx->logger, "Create " << upper_bound_layer->getName() << " for clamp to inputDim");
  auto upper_bound = upper_bound_layer->getOutput(0);
@@ -243,7 +243,8 @@ nvinfer1::ITensor* clamp_to_input_dim(
  LOG_DEBUG(ctx->logger, "Create " << max_layer->getName() << " for clamp to inputDim");
  auto max_itensor = max_layer->getOutput(0);

-  auto min_layer = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kMIN, max_itensor, upper_bound, "min layer for " + name);
+  auto min_layer =
+      add_elementwise(ctx, nvinfer1::ElementWiseOperation::kMIN, max_itensor, upper_bound, "min layer for " + name);
  TORCHTRT_CHECK(min_layer, "Unable to create min_layer for clamp to inputDim");
  LOG_DEBUG(ctx->logger, "Create " << min_layer->getName() << " for clamp to inputDim");
  auto min_itensor = min_layer->getOutput(0);
@@ -257,7 +258,6 @@ nvinfer1::ITensor* normalize_indices(
    nvinfer1::ITensor* indices,
    int nbdims,
    std::string const& name) {
-
  auto zero = torch::zeros({nbdims}).to(torch::kI32);
  auto neg = -torch::ones({nbdims}).to(torch::kI32);
  auto zero_itensor = tensor_to_const(ctx, zero);
@@ -307,17 +307,20 @@ nvinfer1::ITensor* get_slice_size(
  at::Tensor one_tensor = torch::ones({nbdims}).to(torch::kI32);
  auto one_itensor = tensor_to_const(ctx, one_tensor);

-  auto sub_layer = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kSUB, end, start, "get_slice_size sub layer for " + name);
+  auto sub_layer =
+      add_elementwise(ctx, nvinfer1::ElementWiseOperation::kSUB, end, start, "get_slice_size sub layer for " + name);
  TORCHTRT_CHECK(sub_layer, "Unable to create sub layer in calculate_output_size");
  LOG_DEBUG(ctx->logger, "Create " << sub_layer->getName() << " for calculate_output_size");
  auto sub_itensor = sub_layer->getOutput(0);

-  auto div_layer = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kDIV, sub_itensor, stride, "get_slice_size div layer for " + name);
+  auto div_layer = add_elementwise(
+      ctx, nvinfer1::ElementWiseOperation::kDIV, sub_itensor, stride, "get_slice_size div layer for " + name);
  TORCHTRT_CHECK(div_layer, "Unable to create div layer in calculate_output_size");
  LOG_DEBUG(ctx->logger, "Create " << div_layer->getName() << " for calculate_output_size");
  auto div_itensor = div_layer->getOutput(0);

-  auto add_layer = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kSUM, div_itensor, one_itensor, "get_slice_size sum layer for " + name);
+  auto add_layer = add_elementwise(
+      ctx, nvinfer1::ElementWiseOperation::kSUM, div_itensor, one_itensor, "get_slice_size sum layer for " + name);
  TORCHTRT_CHECK(add_layer, "Unable to create add layer in calculate_output_size");
  LOG_DEBUG(ctx->logger, "Create " << add_layer->getName() << " for calculate_output_size");
  auto size_itensor = add_layer->getOutput(0);
diff --git a/workspace/core/conversion/converters/impl/select.cpp b/tmp/changes.txt
index 3599ab9..d33f09a 100644
--- a/workspace/core/conversion/converters/impl/select.cpp
+++ b/tmp/changes.txt
@@ -103,121 +103,118 @@ nvinfer1::ITensor* roll(

auto select_registrations TORCHTRT_UNUSED =
    RegisterNodeConversionPatterns()
-        .pattern(
-            {"aten::select.int(Tensor(a) self, int dim, int index) -> (Tensor(a))",
-             [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
-               auto in = args[0].ITensorOrFreeze(ctx);
-               auto maxDim = static_cast<int64_t>(in->getDimensions().nbDims);
-               auto dim = args[1].unwrapToInt();
-               // Handle negative axis by refering to nbDims of input Tensor
-               dim = dim < 0 ? dim + maxDim : dim;
-               auto ind = (int32_t)args[2].unwrapToInt();
-               // Along the specified dimension, handle negative index by subtracting along length of dimension.
-               ind = ind < 0 ? ind + in->getDimensions().d[dim] : ind;
-               LOG_DEBUG("Gather input dimensions: " << in->getDimensions());
-               LOG_DEBUG("Dimension to select: " << dim);
-               LOG_DEBUG("Index: " << ind);
-
-               // index to access needs to be an at::Tensor
-               at::Tensor indices = torch::tensor({ind}).to(torch::kI32);
-               auto const_out = tensor_to_const(ctx, indices);
-
-               // IGatherLayer takes in input tensor, the indices, and the axis
-               // of input tensor to take indices from
-               auto gather_layer = ctx->net->addGather(*in, *const_out, dim);
-               TORCHTRT_CHECK(gather_layer, "Unable to create gather layer from node: " << *n);
-               auto out = gather_layer->getOutput(0);
+        .pattern({"aten::select.int(Tensor(a) self, int dim, int index) -> (Tensor(a))",
+                  [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
+                    auto in = args[0].ITensorOrFreeze(ctx);
+                    auto maxDim = static_cast<int64_t>(in->getDimensions().nbDims);
+                    auto dim = args[1].unwrapToInt();
+                    // Handle negative axis by refering to nbDims of input Tensor
+                    dim = dim < 0 ? dim + maxDim : dim;
+                    auto ind = (int32_t)args[2].unwrapToInt();
+                    // Along the specified dimension, handle negative index by subtracting along length of dimension.
+                    ind = ind < 0 ? ind + in->getDimensions().d[dim] : ind;
+                    LOG_DEBUG("Gather input dimensions: " << in->getDimensions());
+                    LOG_DEBUG("Dimension to select: " << dim);
+                    LOG_DEBUG("Index: " << ind);
+
+                    // index to access needs to be an at::Tensor
+                    at::Tensor indices = torch::tensor({ind}).to(torch::kI32);
+                    auto const_out = tensor_to_const(ctx, indices);
+
+                    // IGatherLayer takes in input tensor, the indices, and the axis
+                    // of input tensor to take indices from
+                    auto gather_layer = ctx->net->addGather(*in, *const_out, dim);
+                    TORCHTRT_CHECK(gather_layer, "Unable to create gather layer from node: " << *n);
+                    auto out = gather_layer->getOutput(0);
+
+                    LOG_DEBUG("Gather tensor shape: " << out->getDimensions());
+
+                    if (out->getDimensions().nbDims != 1) {
+                      // IShuffleLayer removes redundant dimensions
+                      auto shuffle_layer = ctx->net->addShuffle(*out);
+                      TORCHTRT_CHECK(shuffle_layer, "Unable to create shuffle layer from node: " << *n);
+                      shuffle_layer->setReshapeDimensions(util::squeezeDims(out->getDimensions(), dim));
+                      shuffle_layer->setName(util::node_info(n).c_str());
+                      out = shuffle_layer->getOutput(0);
+                    }
+
+                    out = ctx->AssociateValueAndTensor(n->outputs()[0], out);
+
+                    LOG_DEBUG("Output tensor shape: " << out->getDimensions());

-               LOG_DEBUG("Gather tensor shape: " << out->getDimensions());
+                    return true;
+                  }})
+        .pattern({"aten::narrow(Tensor(a) self, int dim, int start, int length) -> Tensor(a)",
+                  [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
+                    auto in = args[0].ITensor();
+                    auto axis = args[1].unwrapToInt();
+                    auto start = (int32_t)args[2].unwrapToInt();
+                    auto length = (int32_t)args[3].unwrapToInt();

-               if (out->getDimensions().nbDims != 1) {
-                 // IShuffleLayer removes redundant dimensions
-                 auto shuffle_layer = ctx->net->addShuffle(*out);
-                 TORCHTRT_CHECK(shuffle_layer, "Unable to create shuffle layer from node: " << *n);
-                 shuffle_layer->setReshapeDimensions(util::squeezeDims(out->getDimensions(), dim));
-                 shuffle_layer->setName(util::node_info(n).c_str());
-                 out = shuffle_layer->getOutput(0);
-               }
+                    // index to access needs to be an at::Tensor
+                    at::Tensor indices = torch::arange(start, start + length, 1).to(torch::kI32);
+                    auto weights = Weights(ctx, indices);

-               out = ctx->AssociateValueAndTensor(n->outputs()[0], out);
+                    // IConstantLayer to convert indices from Weights to ITensor
+                    auto const_layer = ctx->net->addConstant(weights.shape, weights.data);
+                    TORCHTRT_CHECK(const_layer, "Unable to create constant layer from node: " << *n);
+                    auto const_out = const_layer->getOutput(0);

-               LOG_DEBUG("Output tensor shape: " << out->getDimensions());
+                    // IGatherLayer takes in input tensor, the indices, and the axis
+                    // of input tensor to take indices from
+                    auto gather_layer = ctx->net->addGather(*in, *const_out, axis);
+                    TORCHTRT_CHECK(gather_layer, "Unable to create gather layer from node: " << *n);
+                    auto gather_out = gather_layer->getOutput(0);

-               return true;
-             }})
-        .pattern(
-            {"aten::narrow(Tensor(a) self, int dim, int start, int length) -> Tensor(a)",
-             [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
-               auto in = args[0].ITensor();
-               auto axis = args[1].unwrapToInt();
-               auto start = (int32_t)args[2].unwrapToInt();
-               auto length = (int32_t)args[3].unwrapToInt();
-
-               // index to access needs to be an at::Tensor
-               at::Tensor indices = torch::arange(start, start + length, 1).to(torch::kI32);
-               auto weights = Weights(ctx, indices);
-
-               // IConstantLayer to convert indices from Weights to ITensor
-               auto const_layer = ctx->net->addConstant(weights.shape, weights.data);
-               TORCHTRT_CHECK(const_layer, "Unable to create constant layer from node: " << *n);
-               auto const_out = const_layer->getOutput(0);
-
-               // IGatherLayer takes in input tensor, the indices, and the axis
-               // of input tensor to take indices from
-               auto gather_layer = ctx->net->addGather(*in, *const_out, axis);
-               TORCHTRT_CHECK(gather_layer, "Unable to create gather layer from node: " << *n);
-               auto gather_out = gather_layer->getOutput(0);
-
-               // IShuffleLayer removes redundant dimensions
-               auto shuffle_layer = ctx->net->addShuffle(*gather_out);
-               TORCHTRT_CHECK(shuffle_layer, "Unable to create shuffle layer from node: " << *n);
-               shuffle_layer->setReshapeDimensions(util::unpadDims(gather_out->getDimensions()));
-               shuffle_layer->setName(util::node_info(n).c_str());
-               auto shuffle_out = shuffle_layer->getOutput(0);
+                    // IShuffleLayer removes redundant dimensions
+                    auto shuffle_layer = ctx->net->addShuffle(*gather_out);
+                    TORCHTRT_CHECK(shuffle_layer, "Unable to create shuffle layer from node: " << *n);
+                    shuffle_layer->setReshapeDimensions(util::unpadDims(gather_out->getDimensions()));
+                    shuffle_layer->setName(util::node_info(n).c_str());
+                    auto shuffle_out = shuffle_layer->getOutput(0);

-               auto out = ctx->AssociateValueAndTensor(n->outputs()[0], shuffle_out);
+                    auto out = ctx->AssociateValueAndTensor(n->outputs()[0], shuffle_out);

-               LOG_DEBUG("Output tensor shape: " << out->getDimensions());
+                    LOG_DEBUG("Output tensor shape: " << out->getDimensions());

-               return true;
-             }})
-        .pattern(
-            {"aten::narrow.Tensor(Tensor(a) self, int dim, Tensor start, int length) -> Tensor(a)",
-             [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
-               auto in = args[0].ITensor();
-               auto axis = args[1].unwrapToInt();
-               torch::Tensor start = args[2].IValue()->toTensor().to(torch::kI32);
-               int32_t startIdx = start.item().to<int32_t>();
-               auto length = (int32_t)args[3].unwrapToInt();
-
-               // index to access needs to be an at::Tensor
-               at::Tensor indices = torch::arange(startIdx, startIdx + length, 1).to(torch::kI32);
-               auto weights = Weights(ctx, indices);
-
-               // IConstantLayer to convert indices from Weights to ITensor
-               auto const_layer = ctx->net->addConstant(weights.shape, weights.data);
-               TORCHTRT_CHECK(const_layer, "Unable to create constant layer from node: " << *n);
-               auto const_out = const_layer->getOutput(0);
-
-               // IGatherLayer takes in input tensor, the indices, and the axis
-               // of input tensor to take indices from
-               auto gather_layer = ctx->net->addGather(*in, *const_out, axis);
-               TORCHTRT_CHECK(gather_layer, "Unable to create gather layer from node: " << *n);
-               auto gather_out = gather_layer->getOutput(0);
-
-               // IShuffleLayer removes redundant dimensions
-               auto shuffle_layer = ctx->net->addShuffle(*gather_out);
-               TORCHTRT_CHECK(shuffle_layer, "Unable to create shuffle layer from node: " << *n);
-               shuffle_layer->setReshapeDimensions(util::unpadDims(gather_out->getDimensions()));
-               shuffle_layer->setName(util::node_info(n).c_str());
-               auto shuffle_out = shuffle_layer->getOutput(0);
-
-               auto out = ctx->AssociateValueAndTensor(n->outputs()[0], shuffle_out);
-
-               LOG_DEBUG("Output tensor shape: " << out->getDimensions());
+                    return true;
+                  }})
+        .pattern({"aten::narrow.Tensor(Tensor(a) self, int dim, Tensor start, int length) -> Tensor(a)",
+                  [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
+                    auto in = args[0].ITensor();
+                    auto axis = args[1].unwrapToInt();
+                    torch::Tensor start = args[2].IValue()->toTensor().to(torch::kI32);
+                    int32_t startIdx = start.item().to<int32_t>();
+                    auto length = (int32_t)args[3].unwrapToInt();
+
+                    // index to access needs to be an at::Tensor
+                    at::Tensor indices = torch::arange(startIdx, startIdx + length, 1).to(torch::kI32);
+                    auto weights = Weights(ctx, indices);
+
+                    // IConstantLayer to convert indices from Weights to ITensor
+                    auto const_layer = ctx->net->addConstant(weights.shape, weights.data);
+                    TORCHTRT_CHECK(const_layer, "Unable to create constant layer from node: " << *n);
+                    auto const_out = const_layer->getOutput(0);
+
+                    // IGatherLayer takes in input tensor, the indices, and the axis
+                    // of input tensor to take indices from
+                    auto gather_layer = ctx->net->addGather(*in, *const_out, axis);
+                    TORCHTRT_CHECK(gather_layer, "Unable to create gather layer from node: " << *n);
+                    auto gather_out = gather_layer->getOutput(0);
+
+                    // IShuffleLayer removes redundant dimensions
+                    auto shuffle_layer = ctx->net->addShuffle(*gather_out);
+                    TORCHTRT_CHECK(shuffle_layer, "Unable to create shuffle layer from node: " << *n);
+                    shuffle_layer->setReshapeDimensions(util::unpadDims(gather_out->getDimensions()));
+                    shuffle_layer->setName(util::node_info(n).c_str());
+                    auto shuffle_out = shuffle_layer->getOutput(0);
+
+                    auto out = ctx->AssociateValueAndTensor(n->outputs()[0], shuffle_out);
+
+                    LOG_DEBUG("Output tensor shape: " << out->getDimensions());

-               return true;
-             }})
+                    return true;
+                  }})
        .pattern(
            {"aten::embedding(Tensor weight, Tensor indices, int padding_idx=-1, bool scale_grad_by_freq=False, bool sparse=False) -> (Tensor)",
             [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
@@ -239,30 +236,29 @@ auto select_registrations TORCHTRT_UNUSED =

               return true;
             }})
-        .pattern(
-            {"aten::roll(Tensor self, int[1] shifts, int[1] dims=[]) -> (Tensor)",
-             [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
-               auto in = args[0].ITensor();
-               auto shifts = args[1].unwrapToIntList().vec();
-               auto dims = args[2].unwrapToIntList().vec();
-
-               TORCHTRT_CHECK(dims.size() == shifts.size(), "dims.size() should be equal to shifts.size()");
-               if (ctx->input_is_dynamic) {
-                 TORCHTRT_THROW_ERROR("aten::roll is currently not support in dynamic input shape compilation");
-               } else {
-                 auto in_shape = util::toVec(in->getDimensions());
-                 for (size_t i = 0; i < dims.size(); i++) {
-                   auto dim = dims[i] < 0 ? (in_shape.size() + dims[i]) : dims[i];
-                   TORCHTRT_CHECK(dim < in_shape.size(), "Dimension out of range");
-                   in = roll(ctx, in, shifts[i], dim, in_shape);
-                 }
-                 auto out = ctx->AssociateValueAndTensor(n->outputs()[0], in);
-
-                 LOG_DEBUG("Output tensor shape: " << out->getDimensions());
-
-                 return true;
-               }
-             }})
+        .pattern({"aten::roll(Tensor self, int[1] shifts, int[1] dims=[]) -> (Tensor)",
+                  [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
+                    auto in = args[0].ITensor();
+                    auto shifts = args[1].unwrapToIntList().vec();
+                    auto dims = args[2].unwrapToIntList().vec();
+
+                    TORCHTRT_CHECK(dims.size() == shifts.size(), "dims.size() should be equal to shifts.size()");
+                    if (ctx->input_is_dynamic) {
+                      TORCHTRT_THROW_ERROR("aten::roll is currently not support in dynamic input shape compilation");
+                    } else {
+                      auto in_shape = util::toVec(in->getDimensions());
+                      for (size_t i = 0; i < dims.size(); i++) {
+                        auto dim = dims[i] < 0 ? (in_shape.size() + dims[i]) : dims[i];
+                        TORCHTRT_CHECK(dim < in_shape.size(), "Dimension out of range");
+                        in = roll(ctx, in, shifts[i], dim, in_shape);
+                      }
+                      auto out = ctx->AssociateValueAndTensor(n->outputs()[0], in);
+
+                      LOG_DEBUG("Output tensor shape: " << out->getDimensions());
+
+                      return true;
+                    }
+                  }})
        .pattern(
            {"aten::index.Tensor(Tensor self, Tensor?[] indices) -> (Tensor)",
             [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
@@ -319,7 +315,8 @@ auto select_registrations TORCHTRT_UNUSED =
               int startIdx = 0;
               auto startIdxIVal = args[2].IValue();
               if (!startIdxIVal->isNone()) {
-                 startIdx = startIdxIVal->toInt() > std::numeric_limits<int32_t>::max() ? maxDim : startIdxIVal->toInt();
+                 startIdx =
+                     startIdxIVal->toInt() > std::numeric_limits<int32_t>::max() ? maxDim : startIdxIVal->toInt();
                 startIdx = maxDim == -1 ? startIdx : std::min(startIdx, maxDim);
               }
               // Handle case when given tensor index is negative
@@ -331,7 +328,8 @@ auto select_registrations TORCHTRT_UNUSED =
               int endIdx = maxDim; // -1 for dynamic shape
               auto endIdxIVal = args[3].IValue();
               if (!endIdxIVal->isNone()) {
-                 int truncate_value = endIdxIVal->toInt() > std::numeric_limits<int32_t>::max() ? maxDim : endIdxIVal->toInt();
+                 int truncate_value =
+                     endIdxIVal->toInt() > std::numeric_limits<int32_t>::max() ? maxDim : endIdxIVal->toInt();
                 endIdx = maxDim == -1 ? truncate_value : std::min(truncate_value, maxDim);
               }
               if (maxDim > 0) {
@@ -385,7 +383,8 @@ auto select_registrations TORCHTRT_UNUSED =
                 // update start and end
                 nvinfer1::ITensor* out_start;
                 nvinfer1::ITensor* out_end;
-                 auto start_end = normalize_start_and_end(ctx, ishape_tensor, start_itensor, end_itensor, nbdims, node_name);
+                 auto start_end =
+                     normalize_start_and_end(ctx, ishape_tensor, start_itensor, end_itensor, nbdims, node_name);
                 out_start = start_end[0];
                 out_end = start_end[1];

@@ -397,7 +396,7 @@ auto select_registrations TORCHTRT_UNUSED =
                 slice_layer->setInput(2, *size_itensor); // size, must be set if input is dynamic
               }
               auto slice_out = slice_layer->getOutput(0);
-               
+
               auto out = ctx->AssociateValueAndTensor(n->outputs()[0], slice_out);
               LOG_DEBUG("Slice layer output shape: " << out->getDimensions());

diff --git a/workspace/core/partitioning/shape_analysis.cpp b/tmp/changes.txt
index 1221318..8767048 100644
--- a/workspace/core/partitioning/shape_analysis.cpp
+++ b/tmp/changes.txt
@@ -9,31 +9,28 @@ namespace core {
namespace partitioning {

at::Tensor generateSingleInput(ir::Input& input, c10::optional<at::ScalarType>& type_opt) {
-      auto cur_shape = input.input_shape;
-      std::vector<int64_t> shape;
-      shape.insert(shape.begin(), std::begin(cur_shape.d), std::begin(cur_shape.d) + cur_shape.nbDims);
-      // auto type_opt = types[input.first][i];
-      auto type = at::kFloat;
-      if (type_opt) {
-        type = type_opt.value();
-      } else {
-        LOG_WARNING("Input type for doing shape analysis could not be determined, defaulting to F32");
-      }
-      auto in = at::randint(5, shape, {at::kCUDA}).to(type);
-      // ivalue_map[input.first] = in.clone();
-      return in;
+  auto cur_shape = input.input_shape;
+  std::vector<int64_t> shape;
+  shape.insert(shape.begin(), std::begin(cur_shape.d), std::begin(cur_shape.d) + cur_shape.nbDims);
+  // auto type_opt = types[input.first][i];
+  auto type = at::kFloat;
+  if (type_opt) {
+    type = type_opt.value();
+  } else {
+    LOG_WARNING("Input type for doing shape analysis could not be determined, defaulting to F32");
+  }
+  auto in = at::randint(5, shape, {at::kCUDA}).to(type);
+  // ivalue_map[input.first] = in.clone();
+  return in;
}

std::unordered_map<const torch::jit::Value*, torch::jit::IValue> generateRandomInputs(
    std::unordered_map<const torch::jit::Value*, std::vector<ir::Input>>& inputs,
    std::unordered_map<const torch::jit::Value*, std::vector<c10::optional<at::ScalarType>>>& types) {
-
  // generate random inputs for running pytorch segments
  std::unordered_map<const torch::jit::Value*, torch::jit::IValue> ivalue_map;

-
  for (auto& input : inputs) {
-
    if (input.first->type()->kind() == torch::jit::TypeKind::ListType) {
      // create list
      std::vector<torch::jit::IValue> list;
@@ -56,7 +53,6 @@ std::unordered_map<const torch::jit::Value*, torch::jit::IValue> generateRandomI
    } else {
      auto in = generateSingleInput(input.second[0], types[input.first][0]);
      ivalue_map[input.first] = in.clone();
-
    }
  }
  return ivalue_map;
@@ -109,7 +105,8 @@ void getSegmentsOutputByRunning(
      jit_inputs_ivalues.push_back(ivalues_maps[input].toBool());
    } else if (input->type()->kind() == torch::jit::TypeKind::ListType) {
      // create list
-      jit_inputs_ivalues.push_back(ivalues_maps[input].toList());;
+      jit_inputs_ivalues.push_back(ivalues_maps[input].toList());
+      ;
    } else if (input->type()->kind() == torch::jit::TypeKind::TupleType) {
      // create tuple
      jit_inputs_ivalues.push_back(ivalues_maps[input].toTuple());
diff --git a/workspace/core/ir/GraphInputs.cpp b/tmp/changes.txt
index 007a727..a1b1196 100644
--- a/workspace/core/ir/GraphInputs.cpp
+++ b/tmp/changes.txt
@@ -5,70 +5,74 @@ namespace torch_tensorrt {
namespace core {
namespace ir {

-void flatten_dfs(std::vector<torch_tensorrt::core::ir::Input>& flattened_inputs, std::vector<std::vector<torch_tensorrt::core::ir::Input>>& collection_inputs,
-                 torch::jit::IValue input_ivalue, int level, int index) {
-    if (input_ivalue.isTuple()) {
-      auto input_tuple = input_ivalue.toTuple();
-      int idx = 0;
-      if (level == 0) {
-        collection_inputs.resize(input_tuple->elements().size());
-      }
-      for (auto item: input_tuple->elements()) {
-        torch::jit::IValue converted_item;
-        int cur_idx = level < 1 ? idx: index;
-        flatten_dfs(flattened_inputs, collection_inputs, item, level+1, cur_idx);
-        idx++;
-      }
-    } else if(input_ivalue.isList()) {
-      auto input_list = input_ivalue.toList().vec();
-      if (level == 0) {
-        collection_inputs.resize(input_list.size());
-      }
-      c10::TypePtr type = input_list[0].type();
-      auto converted_elements = c10::impl::GenericList(type);
-      int idx = 0;
-      for (auto item: input_list) {
-        int cur_idx = level < 1 ? idx: index;
-        flatten_dfs(flattened_inputs, collection_inputs, item, level+1, cur_idx);
-        idx++;
-      }
-    } else if(input_ivalue.isCustomClass()) {
-      torch_tensorrt::core::ir::Input cur_input = *(input_ivalue.toCustomClass<torch_tensorrt::core::ir::Input>());
-      flattened_inputs.push_back(cur_input);
-      if (level == 0) {  // a single value like A
-        collection_inputs.resize(1);
-        collection_inputs[0].push_back(cur_input);
-      } else if (level == 1) { // like A in [A, A] or [(B, B), A]
-        collection_inputs[index].push_back(cur_input);
-      } else if (level == 2) {  // like A in [(A, A), C]
-        collection_inputs[index].push_back(cur_input);
-      } else {// only support 2 level
-        LOG_ERROR("Input nesting depth exceeds currently supported depth (3), use 1 level: [A, B], or 2 level: [A, (B, C)]");
-      }
+void flatten_dfs(
+    std::vector<torch_tensorrt::core::ir::Input>& flattened_inputs,
+    std::vector<std::vector<torch_tensorrt::core::ir::Input>>& collection_inputs,
+    torch::jit::IValue input_ivalue,
+    int level,
+    int index) {
+  if (input_ivalue.isTuple()) {
+    auto input_tuple = input_ivalue.toTuple();
+    int idx = 0;
+    if (level == 0) {
+      collection_inputs.resize(input_tuple->elements().size());
    }
+    for (auto item : input_tuple->elements()) {
+      torch::jit::IValue converted_item;
+      int cur_idx = level < 1 ? idx : index;
+      flatten_dfs(flattened_inputs, collection_inputs, item, level + 1, cur_idx);
+      idx++;
+    }
+  } else if (input_ivalue.isList()) {
+    auto input_list = input_ivalue.toList().vec();
+    if (level == 0) {
+      collection_inputs.resize(input_list.size());
+    }
+    c10::TypePtr type = input_list[0].type();
+    auto converted_elements = c10::impl::GenericList(type);
+    int idx = 0;
+    for (auto item : input_list) {
+      int cur_idx = level < 1 ? idx : index;
+      flatten_dfs(flattened_inputs, collection_inputs, item, level + 1, cur_idx);
+      idx++;
+    }
+  } else if (input_ivalue.isCustomClass()) {
+    torch_tensorrt::core::ir::Input cur_input = *(input_ivalue.toCustomClass<torch_tensorrt::core::ir::Input>());
+    flattened_inputs.push_back(cur_input);
+    if (level == 0) { // a single value like A
+      collection_inputs.resize(1);
+      collection_inputs[0].push_back(cur_input);
+    } else if (level == 1) { // like A in [A, A] or [(B, B), A]
+      collection_inputs[index].push_back(cur_input);
+    } else if (level == 2) { // like A in [(A, A), C]
+      collection_inputs[index].push_back(cur_input);
+    } else { // only support 2 level
+      LOG_ERROR(
+          "Input nesting depth exceeds currently supported depth (3), use 1 level: [A, B], or 2 level: [A, (B, C)]");
+    }
+  }
}

-
GraphInputs::GraphInputs(std::vector<ir::Input> inputs_) {
-    LOG_DEBUG("Construct GraphInput with ir::Input");
-    inputs = inputs_;
-    collection_inputs.resize(inputs_.size());
-    for (size_t i = 0; i < inputs_.size(); i++) {
-        collection_inputs[i].push_back(inputs_[i]);
-    }
+  LOG_DEBUG("Construct GraphInput with ir::Input");
+  inputs = inputs_;
+  collection_inputs.resize(inputs_.size());
+  for (size_t i = 0; i < inputs_.size(); i++) {
+    collection_inputs[i].push_back(inputs_[i]);
+  }
}

GraphInputs::GraphInputs(torch::jit::IValue& input_signature_) {
-    LOG_DEBUG("Construct GraphInput with IValue");
+  LOG_DEBUG("Construct GraphInput with IValue");

-    std::vector<torch_tensorrt::core::ir::Input> flattened_inputs;
-    std::vector<std::vector<torch_tensorrt::core::ir::Input>> collection_inputs_;
+  std::vector<torch_tensorrt::core::ir::Input> flattened_inputs;
+  std::vector<std::vector<torch_tensorrt::core::ir::Input>> collection_inputs_;

-    flatten_dfs(flattened_inputs, collection_inputs_, input_signature_, 0, 0);
-    inputs = flattened_inputs;
-    input_signature = input_signature_;
-    collection_inputs = collection_inputs_;
-    LOG_DEBUG("Collection Input Size: " << collection_inputs_.size());
+  flatten_dfs(flattened_inputs, collection_inputs_, input_signature_, 0, 0);
+  inputs = flattened_inputs;
+  input_signature = input_signature_;
+  collection_inputs = collection_inputs_;
+  LOG_DEBUG("Collection Input Size: " << collection_inputs_.size());
}

} // namespace ir
diff --git a/workspace/core/ir/StaticParams.cpp b/tmp/changes.txt
index 0073ad2..8502c80 100644
--- a/workspace/core/ir/StaticParams.cpp
+++ b/tmp/changes.txt
@@ -12,8 +12,7 @@ StaticParams get_static_params(c10::ArrayRef<torch::jit::Value*> inputs, std::ve
  auto param_it = params.begin();
  for (auto in : inputs) {
    // handle TensorType, TupleType and ListType
-    if (in->type() != c10::TensorType::get() && 
-        in->type()->kind() != torch::jit::TypeKind::TupleType &&
+    if (in->type() != c10::TensorType::get() && in->type()->kind() != torch::jit::TypeKind::TupleType &&
        in->type()->kind() != torch::jit::TypeKind::ListType && param_it != params.end()) {
      static_params[in] = *param_it;
      ++param_it;
diff --git a/workspace/core/ir/ir.cpp b/tmp/changes.txt
index cc82fe0..d9b021e 100644
--- a/workspace/core/ir/ir.cpp
+++ b/tmp/changes.txt
@@ -35,7 +35,9 @@ InputSpecMap pair_input_vals_with_specs(std::vector<const torch::jit::Value*> va
  return a;
}

-CollectionInputSpecMap pair_input_vals_with_specs_collection(std::vector<const torch::jit::Value*> vals, std::vector<std::vector<Input>>& specs) {
+CollectionInputSpecMap pair_input_vals_with_specs_collection(
+    std::vector<const torch::jit::Value*> vals,
+    std::vector<std::vector<Input>>& specs) {
  TORCHTRT_CHECK(
      vals.size() == specs.size(),
      "Expected dimension specifications for all input tensors"
@@ -64,7 +66,7 @@ std::vector<const torch::jit::Value*> get_tensor_inputs(
    // input.1:Tensor -> used
    if (in->type()->isSubtypeOf(c10::TensorType::get()) && static_params.find(in) == static_params.end()) {
      input_tensors.push_back(in);
-    } 
+    }
  }
  return input_tensors;
}
@@ -80,7 +82,8 @@ std::vector<const torch::jit::Value*> get_collection_inputs(
    if (in->type()->isSubtypeOf(c10::TensorType::get()) && static_params.find(in) == static_params.end()) {
      input_tensors.push_back(in);
    } else if (in->type()->kind() == torch::jit::TypeKind::TupleType && static_params.find(in) == static_params.end()) {
-    // } else if (in->type()->isSubtypeOf(c10::TupleType::create()) && static_params.find(in) == static_params.end()) {
+      // } else if (in->type()->isSubtypeOf(c10::TupleType::create()) && static_params.find(in) == static_params.end())
+      // {
      input_tensors.push_back(in); // push original tuple
      at::ArrayRef<torch::jit::Value*> unpack_tuple = torch::jit::createTupleUnpack(in);
      LOG_DEBUG("get_collection_inputs, tuple size " << unpack_tuple.size());
@@ -190,15 +193,15 @@ TypeMap get_block_first_calc_dtypes_opt(torch::jit::Block* b) {
    if (i->type() == c10::TensorType::get()) {
      torch::jit::Value* in = i;
      types.insert({in, get_value_first_calc_dtype_opt(b, i)});
-    } else if(i->type()->cast<c10::TupleType>()) {
+    } else if (i->type()->cast<c10::TupleType>()) {
      // make sure very time get the same ptr
      at::ArrayRef<torch::jit::Value*> unpack_tuple = torch::jit::createTupleUnpack(i);
      LOG_DEBUG("Tuple size " << unpack_tuple.size());
-      for (auto item: unpack_tuple) {
+      for (auto item : unpack_tuple) {
        torch::jit::Value* in = item;
        types.insert({in, get_value_first_calc_dtype_opt(b, i)});
      }
-    } else if(i->type()->isSubtypeOf(c10::ListType::ofTensors())) {
+    } else if (i->type()->isSubtypeOf(c10::ListType::ofTensors())) {
      LOG_INFO("Unsupported type of c10::ListType::ofTensors()");
    }
  }
@@ -212,7 +215,7 @@ CollectionTypeMap get_block_first_calc_dtypes_opt_collection(torch::jit::Block*
      torch::jit::Value* in = i;
      types.insert({in, {get_value_first_calc_dtype_opt(b, i)}});

-    } else if(i->type()->kind() == torch::jit::TypeKind::TupleType) {
+    } else if (i->type()->kind() == torch::jit::TypeKind::TupleType) {
      // TODO: to evaluate the data type of tuple element
      // make sure very time get the same ptr
      // c10::optional<at::ScalarType> tp = get_value_first_calc_dtype_opt(b, i);
@@ -220,9 +223,9 @@ CollectionTypeMap get_block_first_calc_dtypes_opt_collection(torch::jit::Block*
      // TODO: calculate the tuple element type, currently we use {} as default datatype
      // std::vector<c10::optional<at::ScalarType>> dytpes(unpack_tuple.size(), tp);
      std::vector<c10::optional<at::ScalarType>> dytpes(unpack_tuple.size());
-      types.insert({i, dytpes}); // insert an empty 
+      types.insert({i, dytpes}); // insert an empty

-    } else if(i->type()->kind() == torch::jit::TypeKind::ListType) {
+    } else if (i->type()->kind() == torch::jit::TypeKind::ListType) {
      // TODO: to decide the size of list and type of list element
      LOG_DEBUG("get_block_first_calc_dtypes_opt ListType: use size " << i->uses().size());
      c10::optional<at::ScalarType> tp = get_value_first_calc_dtype_opt(b, i);
@@ -234,8 +237,7 @@ CollectionTypeMap get_block_first_calc_dtypes_opt_collection(torch::jit::Block*
  return types;
}

-static auto core_input_container =
-    torch::class_<Input>("_torch_tensorrt_core_ir", "Input").def(torch::init<>());
+static auto core_input_container = torch::class_<Input>("_torch_tensorrt_core_ir", "Input").def(torch::init<>());

} // namespace ir
} // namespace core
diff --git a/workspace/core/conversion/converters/converter_util.h b/tmp/changes.txt
index cdf2ee5..b155499 100644
--- a/workspace/core/conversion/converters/converter_util.h
+++ b/tmp/changes.txt
@@ -1,8 +1,8 @@
#pragma once

+#include <limits>
#include <map>
#include <string>
-#include <limits>

#include "core/conversion/conversionctx/ConversionCtx.h"
#include "core/conversion/converters/Weights.h"
diff --git a/workspace/core/partitioning/shape_analysis.h b/tmp/changes.txt
index 2654699..e9c51fc 100644
--- a/workspace/core/partitioning/shape_analysis.h
+++ b/tmp/changes.txt
@@ -6,7 +6,6 @@ namespace torch_tensorrt {
namespace core {
namespace partitioning {

-
std::unordered_map<const torch::jit::Value*, torch::jit::IValue> generateRandomInputs(
    std::unordered_map<const torch::jit::Value*, std::vector<ir::Input>>& input_ranges,
    std::unordered_map<const torch::jit::Value*, std::vector<c10::optional<at::ScalarType>>>& input_types);
diff --git a/workspace/core/ir/ir.h b/tmp/changes.txt
index 966c747..a5225da 100644
--- a/workspace/core/ir/ir.h
+++ b/tmp/changes.txt
@@ -12,7 +12,7 @@ namespace core {
namespace ir {

struct Input : torch::CustomClassHolder {
-  Input() {};
+  Input(){};
  Input(
      std::vector<int64_t> shape,
      nvinfer1::DataType dtype = nvinfer1::DataType::kFLOAT,
@@ -42,8 +42,8 @@ struct Input : torch::CustomClassHolder {
struct GraphInputs {
  GraphInputs(std::vector<ir::Input> inputs);
  GraphInputs(torch::jit::IValue& input_signature);
-  torch::jit::IValue input_signature;  // nested Input, full input spec
-  std::vector<Input> inputs;  // flattend Input
+  torch::jit::IValue input_signature; // nested Input, full input spec
+  std::vector<Input> inputs; // flattend Input
  std::vector<std::vector<Input>> collection_inputs; // only support two layer nesting, e.g. ((a, b), [c, d], e)
};

@@ -67,7 +67,9 @@ CollectionInputSpecMap associate_specs_with_collection_inputs(
    ir::GraphInputs graph_inputs,
    StaticParams& static_params);
InputSpecMap pair_input_vals_with_specs(std::vector<const torch::jit::Value*> vals, std::vector<Input> specs);
-CollectionInputSpecMap pair_input_vals_with_specs_collection(std::vector<const torch::jit::Value*> vals, std::vector<std::vector<Input>>& specs);
+CollectionInputSpecMap pair_input_vals_with_specs_collection(
+    std::vector<const torch::jit::Value*> vals,
+    std::vector<std::vector<Input>>& specs);
std::vector<const torch::jit::Value*> get_tensor_inputs(
    std::shared_ptr<torch::jit::Graph>& g,
    StaticParams& static_params);
diff --git a/workspace/tests/core/conversion/converters/test_cast.cpp b/tmp/changes.txt
index 092cdb3..d26c7a0 100644
--- a/workspace/tests/core/conversion/converters/test_cast.cpp
+++ b/tmp/changes.txt
@@ -135,7 +135,6 @@ TEST(Converters, ATenBoolToINT32TensorConvertsCorrectly) {
  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt, 2e-6));
}

-
TEST(Converters, ATenToSingleConvertsCorrectly) {
  const auto graph = R"IR(
    graph(%y.1 : Tensor):
@@ -164,7 +163,6 @@ TEST(Converters, ATenToSingleConvertsCorrectly) {
  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt, 2e-6));
}

-
TEST(Converters, ATenTypeAsConvertsCorrectly) {
  const auto graph = R"IR(
      graph(%0 : Tensor,
diff --git a/workspace/tests/cpp/test_example_tensors.cpp b/tmp/changes.txt
index 3ec8831..256e6f1 100644
--- a/workspace/tests/cpp/test_example_tensors.cpp
+++ b/tmp/changes.txt
@@ -9,7 +9,6 @@ TEST_P(CppAPITests, InputsFromTensors) {
    trt_inputs_ivalues.push_back(in.clone());
  }

-
  auto inputs = std::vector<torch_tensorrt::Input>{trt_inputs_ivalues[0].toTensor()};
  auto spec = torch_tensorrt::ts::CompileSpec(inputs);

diff --git a/workspace/tests/cpp/test_collections.cpp b/tmp/changes.txt
index df2280b..829e82a 100644
--- a/workspace/tests/cpp/test_collections.cpp
+++ b/tmp/changes.txt
@@ -5,9 +5,7 @@
#include "torch/script.h"
#include "torch_tensorrt/torch_tensorrt.h"

-
TEST(CppAPITests, TestCollectionStandardTensorInput) {
-
  std::string path = "tests/modules/standard_tensor_input_scripted.jit.pt";
  torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kHalf);
  std::vector<at::Tensor> inputs;
@@ -24,7 +22,6 @@ TEST(CppAPITests, TestCollectionStandardTensorInput) {
  mod.eval();
  mod.to(torch::kCUDA);

-
  std::vector<torch::jit::IValue> inputs_;

  for (auto in : inputs) {
@@ -52,7 +49,6 @@ TEST(CppAPITests, TestCollectionStandardTensorInput) {
}

TEST(CppAPITests, TestCollectionTupleInput) {
-
  std::string path = "tests/modules/tuple_input_scripted.jit.pt";
  torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kHalf);

@@ -78,14 +74,12 @@ TEST(CppAPITests, TestCollectionTupleInput) {

  auto input_shape_ivalue = torch::jit::IValue(std::move(c10::make_intrusive<torch_tensorrt::Input>(input_shape)));

-
  std::tuple<torch::jit::IValue, torch::jit::IValue> input_shape_tuple(input_shape_ivalue, input_shape_ivalue);

  torch::jit::IValue complex_input_shape(input_shape_tuple);
  std::tuple<torch::jit::IValue> input_tuple2(complex_input_shape);
  torch::jit::IValue complex_input_shape2(input_tuple2);

-
  auto compile_settings = torch_tensorrt::ts::CompileSpec(complex_input_shape2);
  compile_settings.require_full_compilation = false;
  compile_settings.min_block_size = 3;
@@ -100,9 +94,7 @@ TEST(CppAPITests, TestCollectionTupleInput) {
  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toTensor(), trt_out.toTensor(), 1e-5));
}

-
TEST(CppAPITests, TestCollectionListInput) {
-
  std::string path = "tests/modules/list_input_scripted.jit.pt";
  torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kHalf);
  std::vector<at::Tensor> inputs;
@@ -118,7 +110,6 @@ TEST(CppAPITests, TestCollectionListInput) {
  mod.eval();
  mod.to(torch::kCUDA);

-
  std::vector<torch::jit::IValue> inputs_;

  for (auto in : inputs) {
@@ -134,7 +125,6 @@ TEST(CppAPITests, TestCollectionListInput) {

  complex_inputs.push_back(input_list_ivalue);

-
  auto out = mod.forward(complex_inputs);
  LOG_DEBUG("Finish torchscirpt forward");

@@ -146,7 +136,6 @@ TEST(CppAPITests, TestCollectionListInput) {
  list.push_back(input_shape_ivalue);
  list.push_back(input_shape_ivalue);

-
  torch::jit::IValue complex_input_shape(list);
  std::tuple<torch::jit::IValue> input_tuple2(complex_input_shape);
  torch::jit::IValue complex_input_shape2(input_tuple2);
@@ -166,9 +155,7 @@ TEST(CppAPITests, TestCollectionListInput) {
  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toTensor(), trt_out.toTensor(), 1e-5));
}

-
TEST(CppAPITests, TestCollectionTupleInputOutput) {
-
  std::string path = "tests/modules/tuple_input_output_scripted.jit.pt";

  torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kHalf);
@@ -183,7 +170,6 @@ TEST(CppAPITests, TestCollectionTupleInputOutput) {
  mod.eval();
  mod.to(torch::kCUDA);

-
  std::vector<torch::jit::IValue> complex_inputs, complex_inputs_list;
  std::tuple<torch::jit::IValue, torch::jit::IValue> input_tuple(in0, in0);

@@ -196,7 +182,6 @@ TEST(CppAPITests, TestCollectionTupleInputOutput) {

  auto input_shape_ivalue = torch::jit::IValue(std::move(c10::make_intrusive<torch_tensorrt::Input>(input_shape)));

-
  std::tuple<torch::jit::IValue, torch::jit::IValue> input_shape_tuple(input_shape_ivalue, input_shape_ivalue);

  torch::jit::IValue complex_input_shape(input_shape_tuple);
@@ -217,13 +202,13 @@ TEST(CppAPITests, TestCollectionTupleInputOutput) {
  LOG_DEBUG("Finish compile");
  auto trt_out = trt_mod.forward(complex_inputs);

-  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toTuple()->elements()[0].toTensor(), trt_out.toTuple()->elements()[0].toTensor(), 1e-5));
-  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toTuple()->elements()[1].toTensor(), trt_out.toTuple()->elements()[1].toTensor(), 1e-5));
+  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(
+      out.toTuple()->elements()[0].toTensor(), trt_out.toTuple()->elements()[0].toTensor(), 1e-5));
+  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(
+      out.toTuple()->elements()[1].toTensor(), trt_out.toTuple()->elements()[1].toTensor(), 1e-5));
}

-
TEST(CppAPITests, TestCollectionListInputOutput) {
-
  std::string path = "tests/modules/list_input_output_scripted.jit.pt";
  torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kHalf);
  std::vector<at::Tensor> inputs;
@@ -239,7 +224,6 @@ TEST(CppAPITests, TestCollectionListInputOutput) {
  mod.eval();
  mod.to(torch::kCUDA);

-
  std::vector<torch::jit::IValue> inputs_;

  for (auto in : inputs) {
@@ -255,7 +239,6 @@ TEST(CppAPITests, TestCollectionListInputOutput) {

  complex_inputs.push_back(input_list_ivalue);

-
  auto out = mod.forward(complex_inputs);
  LOG_DEBUG("Finish torchscirpt forward");

@@ -263,13 +246,11 @@ TEST(CppAPITests, TestCollectionListInputOutput) {

  auto input_shape_ivalue = torch::jit::IValue(std::move(c10::make_intrusive<torch_tensorrt::Input>(input_shape)));

-
  c10::TypePtr elementType = input_shape_ivalue.type();
  auto list = c10::impl::GenericList(elementType);
  list.push_back(input_shape_ivalue);
  list.push_back(input_shape_ivalue);

-
  torch::jit::IValue complex_input_shape(list);
  std::tuple<torch::jit::IValue> input_tuple2(complex_input_shape);
  torch::jit::IValue complex_input_shape2(input_tuple2);
@@ -288,13 +269,13 @@ TEST(CppAPITests, TestCollectionListInputOutput) {
  LOG_DEBUG("Finish compile");
  auto trt_out = trt_mod.forward(complex_inputs);

-  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toList().vec()[0].toTensor(), trt_out.toList().vec()[0].toTensor(), 1e-5));
-  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toList().vec()[1].toTensor(), trt_out.toList().vec()[1].toTensor(), 1e-5));
+  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(
+      out.toList().vec()[0].toTensor(), trt_out.toList().vec()[0].toTensor(), 1e-5));
+  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(
+      out.toList().vec()[1].toTensor(), trt_out.toList().vec()[1].toTensor(), 1e-5));
}

-
TEST(CppAPITests, TestCollectionComplexModel) {
-
  std::string path = "tests/modules/list_input_tuple_output_scripted.jit.pt";
  torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kHalf);
  std::vector<at::Tensor> inputs;
@@ -310,7 +291,6 @@ TEST(CppAPITests, TestCollectionComplexModel) {
  mod.eval();
  mod.to(torch::kCUDA);

-
  std::vector<torch::jit::IValue> inputs_;

  for (auto in : inputs) {
@@ -326,7 +306,6 @@ TEST(CppAPITests, TestCollectionComplexModel) {

  complex_inputs.push_back(input_list_ivalue);

-
  auto out = mod.forward(complex_inputs);
  LOG_DEBUG("Finish torchscirpt forward");

@@ -339,7 +318,6 @@ TEST(CppAPITests, TestCollectionComplexModel) {
  list.push_back(input_shape_ivalue);
  list.push_back(input_shape_ivalue);

-
  torch::jit::IValue complex_input_shape(list);
  std::tuple<torch::jit::IValue> input_tuple2(complex_input_shape);
  torch::jit::IValue complex_input_shape2(input_tuple2);
@@ -358,6 +336,8 @@ TEST(CppAPITests, TestCollectionComplexModel) {
  LOG_DEBUG("Finish compile");
  auto trt_out = trt_mod.forward(complex_inputs);

-  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toTuple()->elements()[0].toTensor(), trt_out.toTuple()->elements()[0].toTensor(), 1e-5));
-  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toTuple()->elements()[1].toTensor(), trt_out.toTuple()->elements()[1].toTensor(), 1e-5));
+  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(
+      out.toTuple()->elements()[0].toTensor(), trt_out.toTuple()->elements()[0].toTensor(), 1e-5));
+  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(
+      out.toTuple()->elements()[1].toTensor(), trt_out.toTuple()->elements()[1].toTensor(), 1e-5));
}
\ No newline at end of file
diff --git a/workspace/cpp/bin/torchtrtc/main.cpp b/tmp/changes.txt
index 6c207d7..51ec2c5 100644
--- a/workspace/cpp/bin/torchtrtc/main.cpp
+++ b/tmp/changes.txt
@@ -117,8 +117,7 @@ int main(int argc, char** argv) {
      parser, "num_iters", "Number of averaging timing iterations used to select kernels", {"num-avg-timing-iters"});
  args::ValueFlag<uint64_t> workspace_size(
      parser, "workspace_size", "Maximum size of workspace given to TensorRT", {"workspace-size"});
-  args::ValueFlag<uint64_t> dla_sram_size(
-      parser, "dla_sram_size", "DLA managed SRAM size", {"dla-sram-size"});
+  args::ValueFlag<uint64_t> dla_sram_size(parser, "dla_sram_size", "DLA managed SRAM size", {"dla-sram-size"});
  args::ValueFlag<uint64_t> dla_local_dram_size(
      parser, "dla_local_dram_size", "DLA Local DRAM size", {"dla-local-dram-size"});
  args::ValueFlag<uint64_t> dla_global_dram_size(
diff --git a/workspace/cpp/src/compile_spec.cpp b/tmp/changes.txt
index 1fb4c56..432b070 100644
--- a/workspace/cpp/src/compile_spec.cpp
+++ b/tmp/changes.txt
@@ -29,40 +29,38 @@ CompileSpec::CompileSpec(std::vector<std::vector<int64_t>> fixed_sizes) {
}

CompileSpec::CompileSpec(std::vector<Input> inputs) {
-    graph_inputs.inputs = std::move(inputs);
+  graph_inputs.inputs = std::move(inputs);
}

CompileSpec::CompileSpec(torch::jit::IValue input_signature) {
-    graph_inputs.input_signature = input_signature;
+  graph_inputs.input_signature = input_signature;
}

-
-
void to_internal_input_signature(torch::jit::IValue input_ivalue, torch::jit::IValue& converted_ivalue) {
-    if (input_ivalue.isTuple()) {
-      auto input_tuple = input_ivalue.toTuple();
-      std::vector<torch::jit::IValue> converted_elements;
-      for (auto item: input_tuple->elements()) {
-        torch::jit::IValue converted_item;
-        to_internal_input_signature(item, converted_item);
-        converted_elements.push_back(converted_item);
-        auto tuple_ptr = c10::ivalue::Tuple::create(converted_elements);
-        converted_ivalue = torch::jit::IValue(tuple_ptr);
-      }
-    } else if(input_ivalue.isList()) {
-      auto input_list = input_ivalue.toList().vec();
-      c10::TypePtr type = input_list[0].type();
-      auto converted_elements = c10::impl::GenericList(type);
-      for (auto item: input_list) {
-        torch::jit::IValue converted_item;
-        to_internal_input_signature(item, converted_item);
-        converted_elements.push_back(converted_item);
-      }
-      converted_ivalue = torch::jit::IValue(converted_elements);
-    } else if(input_ivalue.isCustomClass()) {
-      torchtrt::core::ir::Input cur_input = to_internal_input(*(input_ivalue.toCustomClass<torchtrt::Input>()));
-      converted_ivalue = torch::jit::IValue(std::move(c10::make_intrusive<torch_tensorrt::core::ir::Input>(cur_input)));
+  if (input_ivalue.isTuple()) {
+    auto input_tuple = input_ivalue.toTuple();
+    std::vector<torch::jit::IValue> converted_elements;
+    for (auto item : input_tuple->elements()) {
+      torch::jit::IValue converted_item;
+      to_internal_input_signature(item, converted_item);
+      converted_elements.push_back(converted_item);
+      auto tuple_ptr = c10::ivalue::Tuple::create(converted_elements);
+      converted_ivalue = torch::jit::IValue(tuple_ptr);
    }
+  } else if (input_ivalue.isList()) {
+    auto input_list = input_ivalue.toList().vec();
+    c10::TypePtr type = input_list[0].type();
+    auto converted_elements = c10::impl::GenericList(type);
+    for (auto item : input_list) {
+      torch::jit::IValue converted_item;
+      to_internal_input_signature(item, converted_item);
+      converted_elements.push_back(converted_item);
+    }
+    converted_ivalue = torch::jit::IValue(converted_elements);
+  } else if (input_ivalue.isCustomClass()) {
+    torchtrt::core::ir::Input cur_input = to_internal_input(*(input_ivalue.toCustomClass<torchtrt::Input>()));
+    converted_ivalue = torch::jit::IValue(std::move(c10::make_intrusive<torch_tensorrt::core::ir::Input>(cur_input)));
+  }
}

torchtrt::core::CompileSpec init_compile_spec(CompileSpec external) {
diff --git a/workspace/cpp/src/torch_tensorrt.cpp b/tmp/changes.txt
index 9381319..22855ae 100644
--- a/workspace/cpp/src/torch_tensorrt.cpp
+++ b/tmp/changes.txt
@@ -53,6 +53,5 @@ void set_device(const int gpu_id) {
  torch_tensorrt::core::set_device(gpu_id);
}

-static auto tensorrt_input_container =
-    torch::class_<Input>("_torch_tensorrt", "Input").def(torch::init<>());
+static auto tensorrt_input_container = torch::class_<Input>("_torch_tensorrt", "Input").def(torch::init<>());
} // namespace torch_tensorrt
diff --git a/workspace/cpp/include/torch_tensorrt/torch_tensorrt.h b/tmp/changes.txt
index 11dc5d7..6a7035e 100644
--- a/workspace/cpp/include/torch_tensorrt/torch_tensorrt.h
+++ b/tmp/changes.txt
@@ -364,7 +364,7 @@ class TORCHTRT_API TensorFormat {
 * signifying a static input shape or a set of three input shapes representing
 * the min, optiminal and max input shapes allowed for the engine.
 */
-struct TORCHTRT_API Input : torch::CustomClassHolder{
+struct TORCHTRT_API Input : torch::CustomClassHolder {
  /// Minimum acceptable input size into the engine
  std::vector<int64_t> min_shape;
  /// Optimal input size into the engine (size optimized for given kernels accept any size in min max range)
@@ -520,7 +520,7 @@ struct TORCHTRT_API Input : torch::CustomClassHolder{
 * This struct can either hold a complex inputs of shape or a flattened one,
 */
struct TORCHTRT_API GraphInputs {
-  torch::jit::IValue input_signature;  // nested Input, full input spec
+  torch::jit::IValue input_signature; // nested Input, full input spec
  std::vector<Input> inputs; // flatten input spec
};

ERROR: Some files do not conform to style guidelines

github-actions

There are some changes that do not conform to C++ style guidelines:

diff --git a/workspace/py/torch_tensorrt/csrc/torch_tensorrt_py.cpp b/tmp/changes.txt
index 6247789..6b1ffd4 100644
--- a/workspace/py/torch_tensorrt/csrc/torch_tensorrt_py.cpp
+++ b/tmp/changes.txt
@@ -1,8 +1,8 @@
#include "pybind11/pybind11.h"
#include "pybind11/stl.h"

-#include "Python.h"
#include "ATen/core/jit_type.h"
+#include "Python.h"
#include "core/compiler.h"
#include "core/conversion/conversion.h"
#include "tensorrt_classes.h"
@@ -182,7 +182,8 @@ PYBIND11_MODULE(_C, m) {
  py::class_<InputSignature>(m, "InputSignature")
      .def(pybind11::init([](py::object py_obj) {
        InputSignature input_signature;
-        input_signature.signature_ivalue = torch::jit::toIValue(std::move(py_obj), c10::PyObjectType::get(), c10::nullopt);
+        input_signature.signature_ivalue =
+            torch::jit::toIValue(std::move(py_obj), c10::PyObjectType::get(), c10::nullopt);
        return input_signature;
      }))
      .def("__str__", &InputSignature::to_str)
diff --git a/workspace/py/torch_tensorrt/csrc/tensorrt_classes.cpp b/tmp/changes.txt
index ca11cf4..96fef79 100644
--- a/workspace/py/torch_tensorrt/csrc/tensorrt_classes.cpp
+++ b/tmp/changes.txt
@@ -108,35 +108,35 @@ std::string sig_to_str(torch::jit::IValue input_sig) {
  if (input_sig.isTuple()) {
    auto input_tuple = input_sig.toTuple();
    std::vector<std::string> children;
-    for (auto item: input_tuple->elements()) {
+    for (auto item : input_tuple->elements()) {
      auto child = sig_to_str(item);
      children.push_back(child);
    }
    std::stringstream ss;
    ss << "(";
    for (auto i : children) {
-      ss <<  i << ", ";
+      ss << i << ", ";
    }
    ss << ")";
    return ss.str();
-  } else if(input_sig.isList()) {
+  } else if (input_sig.isList()) {
    auto input_list = input_sig.toList().vec();
    std::vector<std::string> children;
-    for (auto item: input_list) {
+    for (auto item : input_list) {
      auto child = sig_to_str(item);
      children.push_back(child);
    }
    std::stringstream ss;
    ss << "[";
    for (auto i : children) {
-      ss <<  i << ", ";
+      ss << i << ", ";
    }
    ss << "]";
    return ss.str();
-  } else if(input_sig.isCustomClass()) {
+  } else if (input_sig.isCustomClass()) {
    auto cur_input = input_sig.toCustomClass<Input>();
    return cur_input->to_str();
-  } else if(input_sig.isPyObject()) {
+  } else if (input_sig.isPyObject()) {
    auto py_object_holder = input_sig.toPyObjectHolder();
    auto infer_type = py_object_holder->tryToInferType();
    auto type = infer_type.type();
@@ -238,27 +238,27 @@ void to_internal_input_signature(torch::jit::IValue input_ivalue, torch::jit::IV
  if (input_ivalue.isTuple()) {
    auto input_tuple = input_ivalue.toTuple();
    std::vector<torch::jit::IValue> converted_elements;
-    for (auto item: input_tuple->elements()) {
+    for (auto item : input_tuple->elements()) {
      torch::jit::IValue converted_item;
      to_internal_input_signature(item, converted_item);
      converted_elements.push_back(converted_item);
      auto tuple_ptr = c10::ivalue::Tuple::create(converted_elements);
      converted_ivalue = torch::jit::IValue(tuple_ptr);
    }
-  } else if(input_ivalue.isList()) {
+  } else if (input_ivalue.isList()) {
    auto input_list = input_ivalue.toList().vec();
    c10::TypePtr type = input_list[0].type();
    auto converted_elements = c10::impl::GenericList(type);
-    for (auto item: input_list) {
+    for (auto item : input_list) {
      torch::jit::IValue converted_item;
      to_internal_input_signature(item, converted_item);
      converted_elements.push_back(converted_item);
    }
    converted_ivalue = torch::jit::IValue(converted_elements);
-  } else if(input_ivalue.isCustomClass()) {
+  } else if (input_ivalue.isCustomClass()) {
    core::ir::Input cur_input = (*(input_ivalue.toCustomClass<Input>())).toInternalInput();
    converted_ivalue = torch::jit::IValue(std::move(c10::make_intrusive<core::ir::Input>(cur_input)));
-  } else if(input_ivalue.isPyObject()) {
+  } else if (input_ivalue.isPyObject()) {
    auto py_object_holder = input_ivalue.toPyObjectHolder();
    auto infer_type = py_object_holder->tryToInferType();
    auto type = infer_type.type();
@@ -325,11 +325,17 @@ core::CompileSpec CompileSpec::toInternalCompileSpec() {
  info.convert_info.engine_settings.num_avg_timing_iters = num_avg_timing_iters;
  TORCHTRT_CHECK(workspace_size >= 0, "workspace_size must be 0 or greater");
  info.convert_info.engine_settings.workspace_size = workspace_size;
-  TORCHTRT_CHECK(dla_sram_size >= 4096, "DLA managed SRAM size must be at least 4 KiB and must be a power of 2. This defaults to 1 MiB");
+  TORCHTRT_CHECK(
+      dla_sram_size >= 4096,
+      "DLA managed SRAM size must be at least 4 KiB and must be a power of 2. This defaults to 1 MiB");
  info.convert_info.engine_settings.dla_sram_size = dla_sram_size;
-  TORCHTRT_CHECK(dla_local_dram_size >= 4096, "DLA Local DRAM size must be at least 4 KiB and must be a power of 2. This defaults to 1 GiB");
+  TORCHTRT_CHECK(
+      dla_local_dram_size >= 4096,
+      "DLA Local DRAM size must be at least 4 KiB and must be a power of 2. This defaults to 1 GiB");
  info.convert_info.engine_settings.dla_local_dram_size = dla_local_dram_size;
-  TORCHTRT_CHECK(dla_global_dram_size >= 4096, "DLA Global DRAM size must be at least 4 KiB and must be a power of 2. This defaults to 512 MiB");
+  TORCHTRT_CHECK(
+      dla_global_dram_size >= 4096,
+      "DLA Global DRAM size must be at least 4 KiB and must be a power of 2. This defaults to 512 MiB");
  info.convert_info.engine_settings.dla_global_dram_size = dla_global_dram_size;
  return info;
}
@@ -348,7 +354,7 @@ std::string CompileSpec::stringify() {
  }
  ss << "    \"Enabled Precision\": [";
  for (auto p : enabled_precisions) {
-    ss << to_str(p) << ", " ;
+    ss << to_str(p) << ", ";
  }
  ss << "]" << std::endl;
  ss << "    \"TF32 Disabled\": " << disable_tf32 << std::endl;
diff --git a/workspace/py/torch_tensorrt/csrc/register_tensorrt_classes.cpp b/tmp/changes.txt
index 0eb6fba..274b40d 100644
--- a/workspace/py/torch_tensorrt/csrc/register_tensorrt_classes.cpp
+++ b/tmp/changes.txt
@@ -28,7 +28,8 @@ void RegisterTRTCompileSpec() {
          .def(torch::init<>())
          .def("__str__", &torch_tensorrt::pyapi::InputSignature::to_str);

-  ADD_FIELD_GET_SET_REGISTRATION(TRTInputSignatureTSRegistration, torch_tensorrt::pyapi::InputSignature, signature_ivalue);
+  ADD_FIELD_GET_SET_REGISTRATION(
+      TRTInputSignatureTSRegistration, torch_tensorrt::pyapi::InputSignature, signature_ivalue);

  static auto TORCHTRT_UNUSED TRTDeviceTSRegistration =
      torch::class_<torch_tensorrt::pyapi::Device>("tensorrt", "_Device")
@@ -73,7 +74,8 @@ void RegisterTRTCompileSpec() {
  ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, workspace_size);
  ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, dla_sram_size);
  ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, dla_local_dram_size);
-  ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, dla_global_dram_size);
+  ADD_FIELD_GET_SET_REGISTRATION(
+      TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, dla_global_dram_size);
  ADD_FIELD_GET_SET_REGISTRATION(
      TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, truncate_long_and_double);
}
diff --git a/workspace/py/torch_tensorrt/csrc/tensorrt_classes.h b/tmp/changes.txt
index d3b2274..be2fab3 100644
--- a/workspace/py/torch_tensorrt/csrc/tensorrt_classes.h
+++ b/tmp/changes.txt
@@ -58,7 +58,7 @@ struct Input : torch::CustomClassHolder {
};

struct InputSignature : torch::CustomClassHolder {
-  torch::jit::IValue signature_ivalue;  // nested Input, full input spec
+  torch::jit::IValue signature_ivalue; // nested Input, full input spec
  ADD_FIELD_GET_SET(signature_ivalue, torch::jit::IValue);
  std::string to_str();
};
diff --git a/workspace/core/compiler.cpp b/tmp/changes.txt
index e44ece5..caee900 100644
--- a/workspace/core/compiler.cpp
+++ b/tmp/changes.txt
@@ -308,70 +308,78 @@ void MapInputsAndDetermineDTypes(
    std::shared_ptr<torch::jit::Graph>& g,
    ir::StaticParams& static_params,
    ir::CollectionTypeMap& first_use_type_map) {
-    cfg.convert_info.collection_input_spec_map = std::move(ir::associate_specs_with_collection_inputs(g, cfg.graph_inputs, static_params));
+  cfg.convert_info.collection_input_spec_map =
+      std::move(ir::associate_specs_with_collection_inputs(g, cfg.graph_inputs, static_params));

-    auto collection_inputs = ir::get_collection_inputs(g, static_params);
-    LOG_DEBUG("In MapInputsAndDetermineDTypes, the g->inputs() size is " << g->inputs().size() << ", CollectionInputSpecMap size is" << collection_inputs.size());
+  auto collection_inputs = ir::get_collection_inputs(g, static_params);
+  LOG_DEBUG(
+      "In MapInputsAndDetermineDTypes, the g->inputs() size is "
+      << g->inputs().size() << ", CollectionInputSpecMap size is" << collection_inputs.size());

-    for (auto in : collection_inputs) {
-      std::vector<ir::Input>& spec = cfg.convert_info.collection_input_spec_map.find(in)->second;
-      std::vector<c10::optional<at::ScalarType>> est_type_opt;
+  for (auto in : collection_inputs) {
+    std::vector<ir::Input>& spec = cfg.convert_info.collection_input_spec_map.find(in)->second;
+    std::vector<c10::optional<at::ScalarType>> est_type_opt;

-      auto est_it = first_use_type_map.find(in);
-      if (est_it != first_use_type_map.end()) {
-        est_type_opt = first_use_type_map.find(in)->second;
-      }
-      // traverse elements in est_type_out and spec
-      for (size_t i = 0; i < est_type_opt.size(); i++) {
-        if (est_type_opt[i] && !spec[i].dtype_is_user_defined) {
-          // If we can calculate the type from the graph and the type was not defined by the user then use the calculated
-          // type
-          LOG_INFO(
-              "Since input type is not explicitly defined, infering using first tensor calculation\n  Inferred input "
-              << in->debugName() << " has type " << est_type_opt[i].value());
-          spec[i].dtype = util::ScalarTypeToTRTDataType(est_type_opt[i].value());
-        } else if (!est_type_opt[i] && !spec[i].dtype_is_user_defined) {
-          // If we cannot calculate the type and the user did not define the type, then default to FP32
-          LOG_WARNING(
-              "Cannot infer input type from calcuations in graph for input "
-              << in->debugName() << ". Assuming it is Float32. If not, specify input type explicity");
-          spec[i].dtype = nvinfer1::DataType::kFLOAT;
-        } else if (spec[i].dtype_is_user_defined && cfg.partition_info.enabled) {
-          if (!est_type_opt[i]) {
-            LOG_INFO("Cannot infer input tensor dtype in graph, compiler is going to use the user setting");
+    auto est_it = first_use_type_map.find(in);
+    if (est_it != first_use_type_map.end()) {
+      est_type_opt = first_use_type_map.find(in)->second;
+    }
+    // traverse elements in est_type_out and spec
+    for (size_t i = 0; i < est_type_opt.size(); i++) {
+      if (est_type_opt[i] && !spec[i].dtype_is_user_defined) {
+        // If we can calculate the type from the graph and the type was not defined by the user then use the calculated
+        // type
+        LOG_INFO(
+            "Since input type is not explicitly defined, infering using first tensor calculation\n  Inferred input "
+            << in->debugName() << " has type " << est_type_opt[i].value());
+        spec[i].dtype = util::ScalarTypeToTRTDataType(est_type_opt[i].value());
+      } else if (!est_type_opt[i] && !spec[i].dtype_is_user_defined) {
+        // If we cannot calculate the type and the user did not define the type, then default to FP32
+        LOG_WARNING(
+            "Cannot infer input type from calcuations in graph for input "
+            << in->debugName() << ". Assuming it is Float32. If not, specify input type explicity");
+        spec[i].dtype = nvinfer1::DataType::kFLOAT;
+      } else if (spec[i].dtype_is_user_defined && cfg.partition_info.enabled) {
+        if (!est_type_opt[i]) {
+          LOG_INFO("Cannot infer input tensor dtype in graph, compiler is going to use the user setting");
+          std::stringstream ss;
+          ss << "For input " << in->debugName() << ", found user specified input dtype as ";
+          ss << cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype;
+          ss << ". The compiler is going to use the user setting "
+             << cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype;
+          auto warn_str = ss.str();
+          LOG_WARNING(warn_str);
+          // Overwrite type map with user settings
+          first_use_type_map[in][i] = {
+              util::TRTDataTypeToScalarType(cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype)};
+
+        } else {
+          if (util::TRTDataTypeToScalarType(cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype) !=
+              est_type_opt[i].value()) {
            std::stringstream ss;
            ss << "For input " << in->debugName() << ", found user specified input dtype as ";
            ss << cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype;
-            ss << ". The compiler is going to use the user setting " << cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype;
+            ss << ", however when inspecting the graph, the input type expected was inferred to be ";
+            ss << est_type_opt[i].value() << std::endl;
+            ss << "The compiler is going to use the user setting "
+               << cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype;
+            ss << "\nThis conflict may cause an error at runtime due to partial compilation being enabled and therefore\n";
+            ss << "compatibility with PyTorch's data type convention is required.\n";
+            ss << "If you do indeed see errors at runtime either:\n";
+            ss << "- Remove the dtype spec for " << in->debugName() << std::endl;
+            ss << "- Disable partial compilation by setting require_full_compilation to True";
            auto warn_str = ss.str();
            LOG_WARNING(warn_str);
            // Overwrite type map with user settings
-            first_use_type_map[in][i] = {util::TRTDataTypeToScalarType(cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype)};
-
-          } else {
-            if (util::TRTDataTypeToScalarType(cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype) != est_type_opt[i].value()) {
-              std::stringstream ss;
-              ss << "For input " << in->debugName() << ", found user specified input dtype as ";
-              ss << cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype;
-              ss << ", however when inspecting the graph, the input type expected was inferred to be ";
-              ss << est_type_opt[i].value() << std::endl;
-              ss << "The compiler is going to use the user setting " << cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype;
-              ss << "\nThis conflict may cause an error at runtime due to partial compilation being enabled and therefore\n";
-              ss << "compatibility with PyTorch's data type convention is required.\n";
-              ss << "If you do indeed see errors at runtime either:\n";
-              ss << "- Remove the dtype spec for " << in->debugName() << std::endl;
-              ss << "- Disable partial compilation by setting require_full_compilation to True";
-              auto warn_str = ss.str();
-              LOG_WARNING(warn_str);
-              // Overwrite type map with user settings
-              first_use_type_map[in][i] = {util::TRTDataTypeToScalarType(cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype)};
-            }
+            first_use_type_map[in][i] = {
+                util::TRTDataTypeToScalarType(cfg.convert_info.collection_input_spec_map.find(in)->second[i].dtype)};
          }
-        } else {
-          // The user defined the type so no changes are necessary
        }
+      } else {
+        // The user defined the type so no changes are necessary
      }
    }
+  }
  // }
}

@@ -425,12 +433,13 @@ torch::jit::Module CompileGraph(const torch::jit::Module& mod, CompileSpec cfg)

      if (cfg.partition_info.enabled &&
          (!(cfg.lower_info.forced_fallback_modules.size() == 0 &&
-            cfg.partition_info.forced_fallback_operators.size() == 0 && isBlockConvertible)
-            || outputIsCollection)) {
-
+             cfg.partition_info.forced_fallback_operators.size() == 0 && isBlockConvertible) ||
+           outputIsCollection)) {
        std::unordered_map<torch::jit::Node*, int> fallback_nodes;
-        auto collection_input_ivalues_map = partitioning::generateRandomInputs(cfg.convert_info.collection_input_spec_map, first_use_types);
-        auto graph_and_mapping = ConstructFallbackGraph(new_mod, g->block(), collection_input_ivalues_map, cfg, static_params, fallback_nodes);
+        auto collection_input_ivalues_map =
+            partitioning::generateRandomInputs(cfg.convert_info.collection_input_spec_map, first_use_types);
+        auto graph_and_mapping = ConstructFallbackGraph(
+            new_mod, g->block(), collection_input_ivalues_map, cfg, static_params, fallback_nodes);
        new_g = graph_and_mapping.first;
        // renaming the input name of graph after fallback to ensure pytorch deserialize it correctly
        for (size_t i = 0; i < new_g->inputs().size(); ++i) {
diff --git a/workspace/core/conversion/conversion.cpp b/tmp/changes.txt
index 914f1dd..5f4b20e 100644
--- a/workspace/core/conversion/conversion.cpp
+++ b/tmp/changes.txt
@@ -135,12 +135,10 @@ void AddLayer(ConversionCtx* ctx, const torch::jit::Node* n) {
                       << "please report this error to https://www.github.com/NVIDIA/Torch-TensorRT/issues");
}

-void AddInputs(
-    ConversionCtx* ctx,
-    c10::ArrayRef<const torch::jit::Value*> inputs,
-    ConversionInfo& conversion_info) {
+void AddInputs(ConversionCtx* ctx, c10::ArrayRef<const torch::jit::Value*> inputs, ConversionInfo& conversion_info) {
  std::unordered_map<const torch::jit::Value*, ir::Input>& input_specs = conversion_info.inputs;
-  std::unordered_map<const torch::jit::Value*, std::vector<ir::Input>> collection_input_spec = conversion_info.collection_input_spec_map;
+  std::unordered_map<const torch::jit::Value*, std::vector<ir::Input>> collection_input_spec =
+      conversion_info.collection_input_spec_map;

  std::vector<const torch::jit::Value*> input_tensors;
  for (auto in : inputs) {
@@ -173,7 +171,7 @@ void AddInputs(
        "Cannot find an input spec associated with input: " << in->debugName());
    ir::Input spec;
    if (input_specs.find(in) != input_specs.end()) {
-        spec = input_specs.find(in)->second;
+      spec = input_specs.find(in)->second;
    } else {
      spec = collection_input_spec.find(in)->second[0]; // assume input is tensor
    }
@@ -559,8 +557,9 @@ std::set<std::string> ConvertableOpsInBlock(const torch::jit::Block* b) {
}

bool OutputIsCollection(const torch::jit::Block* b) {
-  for (auto out: b->outputs()) {
-    if(out->type()->kind() == torch::jit::TypeKind::TupleType || out->type()->kind() == torch::jit::TypeKind::ListType) {
+  for (auto out : b->outputs()) {
+    if (out->type()->kind() == torch::jit::TypeKind::TupleType ||
+        out->type()->kind() == torch::jit::TypeKind::ListType) {
      return true;
    }
  }
diff --git a/workspace/core/conversion/conversionctx/ConversionCtx.cpp b/tmp/changes.txt
index a24a159..71159eb 100644
--- a/workspace/core/conversion/conversionctx/ConversionCtx.cpp
+++ b/tmp/changes.txt
@@ -107,7 +107,7 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings)
  }

  cfg->setAvgTimingIterations(settings.num_avg_timing_iters);
-  if (settings.workspace_size != 0){
+  if (settings.workspace_size != 0) {
    cfg->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, settings.workspace_size);
  }

@@ -124,13 +124,13 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings)
        settings.enabled_precisions.find(nvinfer1::DataType::kFLOAT) == settings.enabled_precisions.end(),
        "DLA supports only fp16 or int8 precision");
    cfg->setDLACore(settings.device.dla_core);
-    if (settings.dla_sram_size != 1048576){
+    if (settings.dla_sram_size != 1048576) {
      cfg->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kDLA_MANAGED_SRAM, settings.dla_sram_size);
    }
-    if (settings.dla_local_dram_size != 1073741824){
+    if (settings.dla_local_dram_size != 1073741824) {
      cfg->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kDLA_LOCAL_DRAM, settings.dla_local_dram_size);
    }
-    if (settings.dla_global_dram_size != 536870912){
+    if (settings.dla_global_dram_size != 536870912) {
      cfg->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kDLA_GLOBAL_DRAM, settings.dla_global_dram_size);
    }
  }
diff --git a/workspace/core/conversion/converters/converter_util.cpp b/tmp/changes.txt
index a6a2bbd..7452615 100644
--- a/workspace/core/conversion/converters/converter_util.cpp
+++ b/tmp/changes.txt
@@ -207,13 +207,13 @@ nvinfer1::ITensor* clamp(
    nvinfer1::ITensor* lower_bound,
    nvinfer1::ITensor* upper_bound,
    std::string const& name) {
-
  auto max_layer = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kMAX, x, lower_bound, "max layer for " + name);
  TORCHTRT_CHECK(max_layer, "Unable to create max layer for clamp");
  LOG_DEBUG(ctx->logger, "Create " << max_layer->getName() << " for clamp");
  auto max_itensor = max_layer->getOutput(0);

-  auto min_layer = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kMIN, max_itensor, upper_bound, "min layer for " + name);
+  auto min_layer =
+      add_elementwise(ctx, nvinfer1::ElementWiseOperation::kMIN, max_itensor, upper_bound, "min layer for " + name);
  TORCHTRT_CHECK(min_layer, "Unable to create min layer for clamp");
  LOG_DEBUG(ctx->logger, "Create " << min_layer->getName() << " for clamp");
  auto min_itensor = min_layer->getOutput(0);
@@ -227,13 +227,13 @@ nvinfer1::ITensor* clamp_to_input_dim(
    nvinfer1::ITensor* input_dim,
    int nbdims,
    std::string const& name) {
-
  auto zero = torch::zeros({nbdims}).to(torch::kI32);
  auto zero_itensor = tensor_to_const(ctx, zero);
  auto one = torch::ones({nbdims}).to(torch::kI32);
  auto one_itensor = tensor_to_const(ctx, one);

-  auto upper_bound_layer = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kSUB, input_dim, one_itensor, "sub layer for " + name);
+  auto upper_bound_layer =
+      add_elementwise(ctx, nvinfer1::ElementWiseOperation::kSUB, input_dim, one_itensor, "sub layer for " + name);
  TORCHTRT_CHECK(upper_bound_layer, "Unable to create sub layer for clamp to inputDim");
  LOG_DEBUG(ctx->logger, "Create " << upper_bound_layer->getName() << " for clamp to inputDim");
  auto upper_bound = upper_bound_layer->getOutput(0);
@@ -243,7 +243,8 @@ nvinfer1::ITensor* clamp_to_input_dim(
  LOG_DEBUG(ctx->logger, "Create " << max_layer->getName() << " for clamp to inputDim");
  auto max_itensor = max_layer->getOutput(0);

-  auto min_layer = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kMIN, max_itensor, upper_bound, "min layer for " + name);
+  auto min_layer =
+      add_elementwise(ctx, nvinfer1::ElementWiseOperation::kMIN, max_itensor, upper_bound, "min layer for " + name);
  TORCHTRT_CHECK(min_layer, "Unable to create min_layer for clamp to inputDim");
  LOG_DEBUG(ctx->logger, "Create " << min_layer->getName() << " for clamp to inputDim");
  auto min_itensor = min_layer->getOutput(0);
@@ -257,7 +258,6 @@ nvinfer1::ITensor* normalize_indices(
    nvinfer1::ITensor* indices,
    int nbdims,
    std::string const& name) {
-
  auto zero = torch::zeros({nbdims}).to(torch::kI32);
  auto neg = -torch::ones({nbdims}).to(torch::kI32);
  auto zero_itensor = tensor_to_const(ctx, zero);
@@ -307,17 +307,20 @@ nvinfer1::ITensor* get_slice_size(
  at::Tensor one_tensor = torch::ones({nbdims}).to(torch::kI32);
  auto one_itensor = tensor_to_const(ctx, one_tensor);

-  auto sub_layer = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kSUB, end, start, "get_slice_size sub layer for " + name);
+  auto sub_layer =
+      add_elementwise(ctx, nvinfer1::ElementWiseOperation::kSUB, end, start, "get_slice_size sub layer for " + name);
  TORCHTRT_CHECK(sub_layer, "Unable to create sub layer in calculate_output_size");
  LOG_DEBUG(ctx->logger, "Create " << sub_layer->getName() << " for calculate_output_size");
  auto sub_itensor = sub_layer->getOutput(0);

-  auto div_layer = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kDIV, sub_itensor, stride, "get_slice_size div layer for " + name);
+  auto div_layer = add_elementwise(
+      ctx, nvinfer1::ElementWiseOperation::kDIV, sub_itensor, stride, "get_slice_size div layer for " + name);
  TORCHTRT_CHECK(div_layer, "Unable to create div layer in calculate_output_size");
  LOG_DEBUG(ctx->logger, "Create " << div_layer->getName() << " for calculate_output_size");
  auto div_itensor = div_layer->getOutput(0);

-  auto add_layer = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kSUM, div_itensor, one_itensor, "get_slice_size sum layer for " + name);
+  auto add_layer = add_elementwise(
+      ctx, nvinfer1::ElementWiseOperation::kSUM, div_itensor, one_itensor, "get_slice_size sum layer for " + name);
  TORCHTRT_CHECK(add_layer, "Unable to create add layer in calculate_output_size");
  LOG_DEBUG(ctx->logger, "Create " << add_layer->getName() << " for calculate_output_size");
  auto size_itensor = add_layer->getOutput(0);
diff --git a/workspace/core/conversion/converters/impl/select.cpp b/tmp/changes.txt
index 3599ab9..d33f09a 100644
--- a/workspace/core/conversion/converters/impl/select.cpp
+++ b/tmp/changes.txt
@@ -103,121 +103,118 @@ nvinfer1::ITensor* roll(

auto select_registrations TORCHTRT_UNUSED =
    RegisterNodeConversionPatterns()
-        .pattern(
-            {"aten::select.int(Tensor(a) self, int dim, int index) -> (Tensor(a))",
-             [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
-               auto in = args[0].ITensorOrFreeze(ctx);
-               auto maxDim = static_cast<int64_t>(in->getDimensions().nbDims);
-               auto dim = args[1].unwrapToInt();
-               // Handle negative axis by refering to nbDims of input Tensor
-               dim = dim < 0 ? dim + maxDim : dim;
-               auto ind = (int32_t)args[2].unwrapToInt();
-               // Along the specified dimension, handle negative index by subtracting along length of dimension.
-               ind = ind < 0 ? ind + in->getDimensions().d[dim] : ind;
-               LOG_DEBUG("Gather input dimensions: " << in->getDimensions());
-               LOG_DEBUG("Dimension to select: " << dim);
-               LOG_DEBUG("Index: " << ind);
-
-               // index to access needs to be an at::Tensor
-               at::Tensor indices = torch::tensor({ind}).to(torch::kI32);
-               auto const_out = tensor_to_const(ctx, indices);
-
-               // IGatherLayer takes in input tensor, the indices, and the axis
-               // of input tensor to take indices from
-               auto gather_layer = ctx->net->addGather(*in, *const_out, dim);
-               TORCHTRT_CHECK(gather_layer, "Unable to create gather layer from node: " << *n);
-               auto out = gather_layer->getOutput(0);
+        .pattern({"aten::select.int(Tensor(a) self, int dim, int index) -> (Tensor(a))",
+                  [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
+                    auto in = args[0].ITensorOrFreeze(ctx);
+                    auto maxDim = static_cast<int64_t>(in->getDimensions().nbDims);
+                    auto dim = args[1].unwrapToInt();
+                    // Handle negative axis by refering to nbDims of input Tensor
+                    dim = dim < 0 ? dim + maxDim : dim;
+                    auto ind = (int32_t)args[2].unwrapToInt();
+                    // Along the specified dimension, handle negative index by subtracting along length of dimension.
+                    ind = ind < 0 ? ind + in->getDimensions().d[dim] : ind;
+                    LOG_DEBUG("Gather input dimensions: " << in->getDimensions());
+                    LOG_DEBUG("Dimension to select: " << dim);
+                    LOG_DEBUG("Index: " << ind);
+
+                    // index to access needs to be an at::Tensor
+                    at::Tensor indices = torch::tensor({ind}).to(torch::kI32);
+                    auto const_out = tensor_to_const(ctx, indices);
+
+                    // IGatherLayer takes in input tensor, the indices, and the axis
+                    // of input tensor to take indices from
+                    auto gather_layer = ctx->net->addGather(*in, *const_out, dim);
+                    TORCHTRT_CHECK(gather_layer, "Unable to create gather layer from node: " << *n);
+                    auto out = gather_layer->getOutput(0);
+
+                    LOG_DEBUG("Gather tensor shape: " << out->getDimensions());
+
+                    if (out->getDimensions().nbDims != 1) {
+                      // IShuffleLayer removes redundant dimensions
+                      auto shuffle_layer = ctx->net->addShuffle(*out);
+                      TORCHTRT_CHECK(shuffle_layer, "Unable to create shuffle layer from node: " << *n);
+                      shuffle_layer->setReshapeDimensions(util::squeezeDims(out->getDimensions(), dim));
+                      shuffle_layer->setName(util::node_info(n).c_str());
+                      out = shuffle_layer->getOutput(0);
+                    }
+
+                    out = ctx->AssociateValueAndTensor(n->outputs()[0], out);
+
+                    LOG_DEBUG("Output tensor shape: " << out->getDimensions());

-               LOG_DEBUG("Gather tensor shape: " << out->getDimensions());
+                    return true;
+                  }})
+        .pattern({"aten::narrow(Tensor(a) self, int dim, int start, int length) -> Tensor(a)",
+                  [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
+                    auto in = args[0].ITensor();
+                    auto axis = args[1].unwrapToInt();
+                    auto start = (int32_t)args[2].unwrapToInt();
+                    auto length = (int32_t)args[3].unwrapToInt();

-               if (out->getDimensions().nbDims != 1) {
-                 // IShuffleLayer removes redundant dimensions
-                 auto shuffle_layer = ctx->net->addShuffle(*out);
-                 TORCHTRT_CHECK(shuffle_layer, "Unable to create shuffle layer from node: " << *n);
-                 shuffle_layer->setReshapeDimensions(util::squeezeDims(out->getDimensions(), dim));
-                 shuffle_layer->setName(util::node_info(n).c_str());
-                 out = shuffle_layer->getOutput(0);
-               }
+                    // index to access needs to be an at::Tensor
+                    at::Tensor indices = torch::arange(start, start + length, 1).to(torch::kI32);
+                    auto weights = Weights(ctx, indices);

-               out = ctx->AssociateValueAndTensor(n->outputs()[0], out);
+                    // IConstantLayer to convert indices from Weights to ITensor
+                    auto const_layer = ctx->net->addConstant(weights.shape, weights.data);
+                    TORCHTRT_CHECK(const_layer, "Unable to create constant layer from node: " << *n);
+                    auto const_out = const_layer->getOutput(0);

-               LOG_DEBUG("Output tensor shape: " << out->getDimensions());
+                    // IGatherLayer takes in input tensor, the indices, and the axis
+                    // of input tensor to take indices from
+                    auto gather_layer = ctx->net->addGather(*in, *const_out, axis);
+                    TORCHTRT_CHECK(gather_layer, "Unable to create gather layer from node: " << *n);
+                    auto gather_out = gather_layer->getOutput(0);

-               return true;
-             }})
-        .pattern(
-            {"aten::narrow(Tensor(a) self, int dim, int start, int length) -> Tensor(a)",
-             [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
-               auto in = args[0].ITensor();
-               auto axis = args[1].unwrapToInt();
-               auto start = (int32_t)args[2].unwrapToInt();
-               auto length = (int32_t)args[3].unwrapToInt();
-
-               // index to access needs to be an at::Tensor
-               at::Tensor indices = torch::arange(start, start + length, 1).to(torch::kI32);
-               auto weights = Weights(ctx, indices);
-
-               // IConstantLayer to convert indices from Weights to ITensor
-               auto const_layer = ctx->net->addConstant(weights.shape, weights.data);
-               TORCHTRT_CHECK(const_layer, "Unable to create constant layer from node: " << *n);
-               auto const_out = const_layer->getOutput(0);
-
-               // IGatherLayer takes in input tensor, the indices, and the axis
-               // of input tensor to take indices from
-               auto gather_layer = ctx->net->addGather(*in, *const_out, axis);
-               TORCHTRT_CHECK(gather_layer, "Unable to create gather layer from node: " << *n);
-               auto gather_out = gather_layer->getOutput(0);
-
-               // IShuffleLayer removes redundant dimensions
-               auto shuffle_layer = ctx->net->addShuffle(*gather_out);
-               TORCHTRT_CHECK(shuffle_layer, "Unable to create shuffle layer from node: " << *n);
-               shuffle_layer->setReshapeDimensions(util::unpadDims(gather_out->getDimensions()));
-               shuffle_layer->setName(util::node_info(n).c_str());
-               auto shuffle_out = shuffle_layer->getOutput(0);
+                    // IShuffleLayer removes redundant dimensions
+                    auto shuffle_layer = ctx->net->addShuffle(*gather_out);
+                    TORCHTRT_CHECK(shuffle_layer, "Unable to create shuffle layer from node: " << *n);
+                    shuffle_layer->setReshapeDimensions(util::unpadDims(gather_out->getDimensions()));
+                    shuffle_layer->setName(util::node_info(n).c_str());
+                    auto shuffle_out = shuffle_layer->getOutput(0);

-               auto out = ctx->AssociateValueAndTensor(n->outputs()[0], shuffle_out);
+                    auto out = ctx->AssociateValueAndTensor(n->outputs()[0], shuffle_out);

-               LOG_DEBUG("Output tensor shape: " << out->getDimensions());
+                    LOG_DEBUG("Output tensor shape: " << out->getDimensions());

-               return true;
-             }})
-        .pattern(
-            {"aten::narrow.Tensor(Tensor(a) self, int dim, Tensor start, int length) -> Tensor(a)",
-             [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
-               auto in = args[0].ITensor();
-               auto axis = args[1].unwrapToInt();
-               torch::Tensor start = args[2].IValue()->toTensor().to(torch::kI32);
-               int32_t startIdx = start.item().to<int32_t>();
-               auto length = (int32_t)args[3].unwrapToInt();
-
-               // index to access needs to be an at::Tensor
-               at::Tensor indices = torch::arange(startIdx, startIdx + length, 1).to(torch::kI32);
-               auto weights = Weights(ctx, indices);
-
-               // IConstantLayer to convert indices from Weights to ITensor
-               auto const_layer = ctx->net->addConstant(weights.shape, weights.data);
-               TORCHTRT_CHECK(const_layer, "Unable to create constant layer from node: " << *n);
-               auto const_out = const_layer->getOutput(0);
-
-               // IGatherLayer takes in input tensor, the indices, and the axis
-               // of input tensor to take indices from
-               auto gather_layer = ctx->net->addGather(*in, *const_out, axis);
-               TORCHTRT_CHECK(gather_layer, "Unable to create gather layer from node: " << *n);
-               auto gather_out = gather_layer->getOutput(0);
-
-               // IShuffleLayer removes redundant dimensions
-               auto shuffle_layer = ctx->net->addShuffle(*gather_out);
-               TORCHTRT_CHECK(shuffle_layer, "Unable to create shuffle layer from node: " << *n);
-               shuffle_layer->setReshapeDimensions(util::unpadDims(gather_out->getDimensions()));
-               shuffle_layer->setName(util::node_info(n).c_str());
-               auto shuffle_out = shuffle_layer->getOutput(0);
-
-               auto out = ctx->AssociateValueAndTensor(n->outputs()[0], shuffle_out);
-
-               LOG_DEBUG("Output tensor shape: " << out->getDimensions());
+                    return true;
+                  }})
+        .pattern({"aten::narrow.Tensor(Tensor(a) self, int dim, Tensor start, int length) -> Tensor(a)",
+                  [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
+                    auto in = args[0].ITensor();
+                    auto axis = args[1].unwrapToInt();
+                    torch::Tensor start = args[2].IValue()->toTensor().to(torch::kI32);
+                    int32_t startIdx = start.item().to<int32_t>();
+                    auto length = (int32_t)args[3].unwrapToInt();
+
+                    // index to access needs to be an at::Tensor
+                    at::Tensor indices = torch::arange(startIdx, startIdx + length, 1).to(torch::kI32);
+                    auto weights = Weights(ctx, indices);
+
+                    // IConstantLayer to convert indices from Weights to ITensor
+                    auto const_layer = ctx->net->addConstant(weights.shape, weights.data);
+                    TORCHTRT_CHECK(const_layer, "Unable to create constant layer from node: " << *n);
+                    auto const_out = const_layer->getOutput(0);
+
+                    // IGatherLayer takes in input tensor, the indices, and the axis
+                    // of input tensor to take indices from
+                    auto gather_layer = ctx->net->addGather(*in, *const_out, axis);
+                    TORCHTRT_CHECK(gather_layer, "Unable to create gather layer from node: " << *n);
+                    auto gather_out = gather_layer->getOutput(0);
+
+                    // IShuffleLayer removes redundant dimensions
+                    auto shuffle_layer = ctx->net->addShuffle(*gather_out);
+                    TORCHTRT_CHECK(shuffle_layer, "Unable to create shuffle layer from node: " << *n);
+                    shuffle_layer->setReshapeDimensions(util::unpadDims(gather_out->getDimensions()));
+                    shuffle_layer->setName(util::node_info(n).c_str());
+                    auto shuffle_out = shuffle_layer->getOutput(0);
+
+                    auto out = ctx->AssociateValueAndTensor(n->outputs()[0], shuffle_out);
+
+                    LOG_DEBUG("Output tensor shape: " << out->getDimensions());

-               return true;
-             }})
+                    return true;
+                  }})
        .pattern(
            {"aten::embedding(Tensor weight, Tensor indices, int padding_idx=-1, bool scale_grad_by_freq=False, bool sparse=False) -> (Tensor)",
             [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
@@ -239,30 +236,29 @@ auto select_registrations TORCHTRT_UNUSED =

               return true;
             }})
-        .pattern(
-            {"aten::roll(Tensor self, int[1] shifts, int[1] dims=[]) -> (Tensor)",
-             [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
-               auto in = args[0].ITensor();
-               auto shifts = args[1].unwrapToIntList().vec();
-               auto dims = args[2].unwrapToIntList().vec();
-
-               TORCHTRT_CHECK(dims.size() == shifts.size(), "dims.size() should be equal to shifts.size()");
-               if (ctx->input_is_dynamic) {
-                 TORCHTRT_THROW_ERROR("aten::roll is currently not support in dynamic input shape compilation");
-               } else {
-                 auto in_shape = util::toVec(in->getDimensions());
-                 for (size_t i = 0; i < dims.size(); i++) {
-                   auto dim = dims[i] < 0 ? (in_shape.size() + dims[i]) : dims[i];
-                   TORCHTRT_CHECK(dim < in_shape.size(), "Dimension out of range");
-                   in = roll(ctx, in, shifts[i], dim, in_shape);
-                 }
-                 auto out = ctx->AssociateValueAndTensor(n->outputs()[0], in);
-
-                 LOG_DEBUG("Output tensor shape: " << out->getDimensions());
-
-                 return true;
-               }
-             }})
+        .pattern({"aten::roll(Tensor self, int[1] shifts, int[1] dims=[]) -> (Tensor)",
+                  [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
+                    auto in = args[0].ITensor();
+                    auto shifts = args[1].unwrapToIntList().vec();
+                    auto dims = args[2].unwrapToIntList().vec();
+
+                    TORCHTRT_CHECK(dims.size() == shifts.size(), "dims.size() should be equal to shifts.size()");
+                    if (ctx->input_is_dynamic) {
+                      TORCHTRT_THROW_ERROR("aten::roll is currently not support in dynamic input shape compilation");
+                    } else {
+                      auto in_shape = util::toVec(in->getDimensions());
+                      for (size_t i = 0; i < dims.size(); i++) {
+                        auto dim = dims[i] < 0 ? (in_shape.size() + dims[i]) : dims[i];
+                        TORCHTRT_CHECK(dim < in_shape.size(), "Dimension out of range");
+                        in = roll(ctx, in, shifts[i], dim, in_shape);
+                      }
+                      auto out = ctx->AssociateValueAndTensor(n->outputs()[0], in);
+
+                      LOG_DEBUG("Output tensor shape: " << out->getDimensions());
+
+                      return true;
+                    }
+                  }})
        .pattern(
            {"aten::index.Tensor(Tensor self, Tensor?[] indices) -> (Tensor)",
             [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
@@ -319,7 +315,8 @@ auto select_registrations TORCHTRT_UNUSED =
               int startIdx = 0;
               auto startIdxIVal = args[2].IValue();
               if (!startIdxIVal->isNone()) {
-                 startIdx = startIdxIVal->toInt() > std::numeric_limits<int32_t>::max() ? maxDim : startIdxIVal->toInt();
+                 startIdx =
+                     startIdxIVal->toInt() > std::numeric_limits<int32_t>::max() ? maxDim : startIdxIVal->toInt();
                 startIdx = maxDim == -1 ? startIdx : std::min(startIdx, maxDim);
               }
               // Handle case when given tensor index is negative
@@ -331,7 +328,8 @@ auto select_registrations TORCHTRT_UNUSED =
               int endIdx = maxDim; // -1 for dynamic shape
               auto endIdxIVal = args[3].IValue();
               if (!endIdxIVal->isNone()) {
-                 int truncate_value = endIdxIVal->toInt() > std::numeric_limits<int32_t>::max() ? maxDim : endIdxIVal->toInt();
+                 int truncate_value =
+                     endIdxIVal->toInt() > std::numeric_limits<int32_t>::max() ? maxDim : endIdxIVal->toInt();
                 endIdx = maxDim == -1 ? truncate_value : std::min(truncate_value, maxDim);
               }
               if (maxDim > 0) {
@@ -385,7 +383,8 @@ auto select_registrations TORCHTRT_UNUSED =
                 // update start and end
                 nvinfer1::ITensor* out_start;
                 nvinfer1::ITensor* out_end;
-                 auto start_end = normalize_start_and_end(ctx, ishape_tensor, start_itensor, end_itensor, nbdims, node_name);
+                 auto start_end =
+                     normalize_start_and_end(ctx, ishape_tensor, start_itensor, end_itensor, nbdims, node_name);
                 out_start = start_end[0];
                 out_end = start_end[1];

@@ -397,7 +396,7 @@ auto select_registrations TORCHTRT_UNUSED =
                 slice_layer->setInput(2, *size_itensor); // size, must be set if input is dynamic
               }
               auto slice_out = slice_layer->getOutput(0);
-               
+
               auto out = ctx->AssociateValueAndTensor(n->outputs()[0], slice_out);
               LOG_DEBUG("Slice layer output shape: " << out->getDimensions());

diff --git a/workspace/core/partitioning/shape_analysis.cpp b/tmp/changes.txt
index 1221318..8767048 100644
--- a/workspace/core/partitioning/shape_analysis.cpp
+++ b/tmp/changes.txt
@@ -9,31 +9,28 @@ namespace core {
namespace partitioning {

at::Tensor generateSingleInput(ir::Input& input, c10::optional<at::ScalarType>& type_opt) {
-      auto cur_shape = input.input_shape;
-      std::vector<int64_t> shape;
-      shape.insert(shape.begin(), std::begin(cur_shape.d), std::begin(cur_shape.d) + cur_shape.nbDims);
-      // auto type_opt = types[input.first][i];
-      auto type = at::kFloat;
-      if (type_opt) {
-        type = type_opt.value();
-      } else {
-        LOG_WARNING("Input type for doing shape analysis could not be determined, defaulting to F32");
-      }
-      auto in = at::randint(5, shape, {at::kCUDA}).to(type);
-      // ivalue_map[input.first] = in.clone();
-      return in;
+  auto cur_shape = input.input_shape;
+  std::vector<int64_t> shape;
+  shape.insert(shape.begin(), std::begin(cur_shape.d), std::begin(cur_shape.d) + cur_shape.nbDims);
+  // auto type_opt = types[input.first][i];
+  auto type = at::kFloat;
+  if (type_opt) {
+    type = type_opt.value();
+  } else {
+    LOG_WARNING("Input type for doing shape analysis could not be determined, defaulting to F32");
+  }
+  auto in = at::randint(5, shape, {at::kCUDA}).to(type);
+  // ivalue_map[input.first] = in.clone();
+  return in;
}

std::unordered_map<const torch::jit::Value*, torch::jit::IValue> generateRandomInputs(
    std::unordered_map<const torch::jit::Value*, std::vector<ir::Input>>& inputs,
    std::unordered_map<const torch::jit::Value*, std::vector<c10::optional<at::ScalarType>>>& types) {
-
  // generate random inputs for running pytorch segments
  std::unordered_map<const torch::jit::Value*, torch::jit::IValue> ivalue_map;

-
  for (auto& input : inputs) {
-
    if (input.first->type()->kind() == torch::jit::TypeKind::ListType) {
      // create list
      std::vector<torch::jit::IValue> list;
@@ -56,7 +53,6 @@ std::unordered_map<const torch::jit::Value*, torch::jit::IValue> generateRandomI
    } else {
      auto in = generateSingleInput(input.second[0], types[input.first][0]);
      ivalue_map[input.first] = in.clone();
-
    }
  }
  return ivalue_map;
@@ -109,7 +105,8 @@ void getSegmentsOutputByRunning(
      jit_inputs_ivalues.push_back(ivalues_maps[input].toBool());
    } else if (input->type()->kind() == torch::jit::TypeKind::ListType) {
      // create list
-      jit_inputs_ivalues.push_back(ivalues_maps[input].toList());;
+      jit_inputs_ivalues.push_back(ivalues_maps[input].toList());
+      ;
    } else if (input->type()->kind() == torch::jit::TypeKind::TupleType) {
      // create tuple
      jit_inputs_ivalues.push_back(ivalues_maps[input].toTuple());
diff --git a/workspace/core/ir/GraphInputs.cpp b/tmp/changes.txt
index 007a727..a1b1196 100644
--- a/workspace/core/ir/GraphInputs.cpp
+++ b/tmp/changes.txt
@@ -5,70 +5,74 @@ namespace torch_tensorrt {
namespace core {
namespace ir {

-void flatten_dfs(std::vector<torch_tensorrt::core::ir::Input>& flattened_inputs, std::vector<std::vector<torch_tensorrt::core::ir::Input>>& collection_inputs,
-                 torch::jit::IValue input_ivalue, int level, int index) {
-    if (input_ivalue.isTuple()) {
-      auto input_tuple = input_ivalue.toTuple();
-      int idx = 0;
-      if (level == 0) {
-        collection_inputs.resize(input_tuple->elements().size());
-      }
-      for (auto item: input_tuple->elements()) {
-        torch::jit::IValue converted_item;
-        int cur_idx = level < 1 ? idx: index;
-        flatten_dfs(flattened_inputs, collection_inputs, item, level+1, cur_idx);
-        idx++;
-      }
-    } else if(input_ivalue.isList()) {
-      auto input_list = input_ivalue.toList().vec();
-      if (level == 0) {
-        collection_inputs.resize(input_list.size());
-      }
-      c10::TypePtr type = input_list[0].type();
-      auto converted_elements = c10::impl::GenericList(type);
-      int idx = 0;
-      for (auto item: input_list) {
-        int cur_idx = level < 1 ? idx: index;
-        flatten_dfs(flattened_inputs, collection_inputs, item, level+1, cur_idx);
-        idx++;
-      }
-    } else if(input_ivalue.isCustomClass()) {
-      torch_tensorrt::core::ir::Input cur_input = *(input_ivalue.toCustomClass<torch_tensorrt::core::ir::Input>());
-      flattened_inputs.push_back(cur_input);
-      if (level == 0) {  // a single value like A
-        collection_inputs.resize(1);
-        collection_inputs[0].push_back(cur_input);
-      } else if (level == 1) { // like A in [A, A] or [(B, B), A]
-        collection_inputs[index].push_back(cur_input);
-      } else if (level == 2) {  // like A in [(A, A), C]
-        collection_inputs[index].push_back(cur_input);
-      } else {// only support 2 level
-        LOG_ERROR("Input nesting depth exceeds currently supported depth (3), use 1 level: [A, B], or 2 level: [A, (B, C)]");
-      }
+void flatten_dfs(
+    std::vector<torch_tensorrt::core::ir::Input>& flattened_inputs,
+    std::vector<std::vector<torch_tensorrt::core::ir::Input>>& collection_inputs,
+    torch::jit::IValue input_ivalue,
+    int level,
+    int index) {
+  if (input_ivalue.isTuple()) {
+    auto input_tuple = input_ivalue.toTuple();
+    int idx = 0;
+    if (level == 0) {
+      collection_inputs.resize(input_tuple->elements().size());
    }
+    for (auto item : input_tuple->elements()) {
+      torch::jit::IValue converted_item;
+      int cur_idx = level < 1 ? idx : index;
+      flatten_dfs(flattened_inputs, collection_inputs, item, level + 1, cur_idx);
+      idx++;
+    }
+  } else if (input_ivalue.isList()) {
+    auto input_list = input_ivalue.toList().vec();
+    if (level == 0) {
+      collection_inputs.resize(input_list.size());
+    }
+    c10::TypePtr type = input_list[0].type();
+    auto converted_elements = c10::impl::GenericList(type);
+    int idx = 0;
+    for (auto item : input_list) {
+      int cur_idx = level < 1 ? idx : index;
+      flatten_dfs(flattened_inputs, collection_inputs, item, level + 1, cur_idx);
+      idx++;
+    }
+  } else if (input_ivalue.isCustomClass()) {
+    torch_tensorrt::core::ir::Input cur_input = *(input_ivalue.toCustomClass<torch_tensorrt::core::ir::Input>());
+    flattened_inputs.push_back(cur_input);
+    if (level == 0) { // a single value like A
+      collection_inputs.resize(1);
+      collection_inputs[0].push_back(cur_input);
+    } else if (level == 1) { // like A in [A, A] or [(B, B), A]
+      collection_inputs[index].push_back(cur_input);
+    } else if (level == 2) { // like A in [(A, A), C]
+      collection_inputs[index].push_back(cur_input);
+    } else { // only support 2 level
+      LOG_ERROR(
+          "Input nesting depth exceeds currently supported depth (3), use 1 level: [A, B], or 2 level: [A, (B, C)]");
+    }
+  }
}

-
GraphInputs::GraphInputs(std::vector<ir::Input> inputs_) {
-    LOG_DEBUG("Construct GraphInput with ir::Input");
-    inputs = inputs_;
-    collection_inputs.resize(inputs_.size());
-    for (size_t i = 0; i < inputs_.size(); i++) {
-        collection_inputs[i].push_back(inputs_[i]);
-    }
+  LOG_DEBUG("Construct GraphInput with ir::Input");
+  inputs = inputs_;
+  collection_inputs.resize(inputs_.size());
+  for (size_t i = 0; i < inputs_.size(); i++) {
+    collection_inputs[i].push_back(inputs_[i]);
+  }
}

GraphInputs::GraphInputs(torch::jit::IValue& input_signature_) {
-    LOG_DEBUG("Construct GraphInput with IValue");
+  LOG_DEBUG("Construct GraphInput with IValue");

-    std::vector<torch_tensorrt::core::ir::Input> flattened_inputs;
-    std::vector<std::vector<torch_tensorrt::core::ir::Input>> collection_inputs_;
+  std::vector<torch_tensorrt::core::ir::Input> flattened_inputs;
+  std::vector<std::vector<torch_tensorrt::core::ir::Input>> collection_inputs_;

-    flatten_dfs(flattened_inputs, collection_inputs_, input_signature_, 0, 0);
-    inputs = flattened_inputs;
-    input_signature = input_signature_;
-    collection_inputs = collection_inputs_;
-    LOG_DEBUG("Collection Input Size: " << collection_inputs_.size());
+  flatten_dfs(flattened_inputs, collection_inputs_, input_signature_, 0, 0);
+  inputs = flattened_inputs;
+  input_signature = input_signature_;
+  collection_inputs = collection_inputs_;
+  LOG_DEBUG("Collection Input Size: " << collection_inputs_.size());
}

} // namespace ir
diff --git a/workspace/core/ir/StaticParams.cpp b/tmp/changes.txt
index 0073ad2..8502c80 100644
--- a/workspace/core/ir/StaticParams.cpp
+++ b/tmp/changes.txt
@@ -12,8 +12,7 @@ StaticParams get_static_params(c10::ArrayRef<torch::jit::Value*> inputs, std::ve
  auto param_it = params.begin();
  for (auto in : inputs) {
    // handle TensorType, TupleType and ListType
-    if (in->type() != c10::TensorType::get() && 
-        in->type()->kind() != torch::jit::TypeKind::TupleType &&
+    if (in->type() != c10::TensorType::get() && in->type()->kind() != torch::jit::TypeKind::TupleType &&
        in->type()->kind() != torch::jit::TypeKind::ListType && param_it != params.end()) {
      static_params[in] = *param_it;
      ++param_it;
diff --git a/workspace/core/ir/ir.cpp b/tmp/changes.txt
index cc82fe0..d9b021e 100644
--- a/workspace/core/ir/ir.cpp
+++ b/tmp/changes.txt
@@ -35,7 +35,9 @@ InputSpecMap pair_input_vals_with_specs(std::vector<const torch::jit::Value*> va
  return a;
}

-CollectionInputSpecMap pair_input_vals_with_specs_collection(std::vector<const torch::jit::Value*> vals, std::vector<std::vector<Input>>& specs) {
+CollectionInputSpecMap pair_input_vals_with_specs_collection(
+    std::vector<const torch::jit::Value*> vals,
+    std::vector<std::vector<Input>>& specs) {
  TORCHTRT_CHECK(
      vals.size() == specs.size(),
      "Expected dimension specifications for all input tensors"
@@ -64,7 +66,7 @@ std::vector<const torch::jit::Value*> get_tensor_inputs(
    // input.1:Tensor -> used
    if (in->type()->isSubtypeOf(c10::TensorType::get()) && static_params.find(in) == static_params.end()) {
      input_tensors.push_back(in);
-    } 
+    }
  }
  return input_tensors;
}
@@ -80,7 +82,8 @@ std::vector<const torch::jit::Value*> get_collection_inputs(
    if (in->type()->isSubtypeOf(c10::TensorType::get()) && static_params.find(in) == static_params.end()) {
      input_tensors.push_back(in);
    } else if (in->type()->kind() == torch::jit::TypeKind::TupleType && static_params.find(in) == static_params.end()) {
-    // } else if (in->type()->isSubtypeOf(c10::TupleType::create()) && static_params.find(in) == static_params.end()) {
+      // } else if (in->type()->isSubtypeOf(c10::TupleType::create()) && static_params.find(in) == static_params.end())
+      // {
      input_tensors.push_back(in); // push original tuple
      at::ArrayRef<torch::jit::Value*> unpack_tuple = torch::jit::createTupleUnpack(in);
      LOG_DEBUG("get_collection_inputs, tuple size " << unpack_tuple.size());
@@ -190,15 +193,15 @@ TypeMap get_block_first_calc_dtypes_opt(torch::jit::Block* b) {
    if (i->type() == c10::TensorType::get()) {
      torch::jit::Value* in = i;
      types.insert({in, get_value_first_calc_dtype_opt(b, i)});
-    } else if(i->type()->cast<c10::TupleType>()) {
+    } else if (i->type()->cast<c10::TupleType>()) {
      // make sure very time get the same ptr
      at::ArrayRef<torch::jit::Value*> unpack_tuple = torch::jit::createTupleUnpack(i);
      LOG_DEBUG("Tuple size " << unpack_tuple.size());
-      for (auto item: unpack_tuple) {
+      for (auto item : unpack_tuple) {
        torch::jit::Value* in = item;
        types.insert({in, get_value_first_calc_dtype_opt(b, i)});
      }
-    } else if(i->type()->isSubtypeOf(c10::ListType::ofTensors())) {
+    } else if (i->type()->isSubtypeOf(c10::ListType::ofTensors())) {
      LOG_INFO("Unsupported type of c10::ListType::ofTensors()");
    }
  }
@@ -212,7 +215,7 @@ CollectionTypeMap get_block_first_calc_dtypes_opt_collection(torch::jit::Block*
      torch::jit::Value* in = i;
      types.insert({in, {get_value_first_calc_dtype_opt(b, i)}});

-    } else if(i->type()->kind() == torch::jit::TypeKind::TupleType) {
+    } else if (i->type()->kind() == torch::jit::TypeKind::TupleType) {
      // TODO: to evaluate the data type of tuple element
      // make sure very time get the same ptr
      // c10::optional<at::ScalarType> tp = get_value_first_calc_dtype_opt(b, i);
@@ -220,9 +223,9 @@ CollectionTypeMap get_block_first_calc_dtypes_opt_collection(torch::jit::Block*
      // TODO: calculate the tuple element type, currently we use {} as default datatype
      // std::vector<c10::optional<at::ScalarType>> dytpes(unpack_tuple.size(), tp);
      std::vector<c10::optional<at::ScalarType>> dytpes(unpack_tuple.size());
-      types.insert({i, dytpes}); // insert an empty 
+      types.insert({i, dytpes}); // insert an empty

-    } else if(i->type()->kind() == torch::jit::TypeKind::ListType) {
+    } else if (i->type()->kind() == torch::jit::TypeKind::ListType) {
      // TODO: to decide the size of list and type of list element
      LOG_DEBUG("get_block_first_calc_dtypes_opt ListType: use size " << i->uses().size());
      c10::optional<at::ScalarType> tp = get_value_first_calc_dtype_opt(b, i);
@@ -234,8 +237,7 @@ CollectionTypeMap get_block_first_calc_dtypes_opt_collection(torch::jit::Block*
  return types;
}

-static auto core_input_container =
-    torch::class_<Input>("_torch_tensorrt_core_ir", "Input").def(torch::init<>());
+static auto core_input_container = torch::class_<Input>("_torch_tensorrt_core_ir", "Input").def(torch::init<>());

} // namespace ir
} // namespace core
diff --git a/workspace/core/conversion/converters/converter_util.h b/tmp/changes.txt
index cdf2ee5..b155499 100644
--- a/workspace/core/conversion/converters/converter_util.h
+++ b/tmp/changes.txt
@@ -1,8 +1,8 @@
#pragma once

+#include <limits>
#include <map>
#include <string>
-#include <limits>

#include "core/conversion/conversionctx/ConversionCtx.h"
#include "core/conversion/converters/Weights.h"
diff --git a/workspace/core/partitioning/shape_analysis.h b/tmp/changes.txt
index 2654699..e9c51fc 100644
--- a/workspace/core/partitioning/shape_analysis.h
+++ b/tmp/changes.txt
@@ -6,7 +6,6 @@ namespace torch_tensorrt {
namespace core {
namespace partitioning {

-
std::unordered_map<const torch::jit::Value*, torch::jit::IValue> generateRandomInputs(
    std::unordered_map<const torch::jit::Value*, std::vector<ir::Input>>& input_ranges,
    std::unordered_map<const torch::jit::Value*, std::vector<c10::optional<at::ScalarType>>>& input_types);
diff --git a/workspace/core/ir/ir.h b/tmp/changes.txt
index 966c747..a5225da 100644
--- a/workspace/core/ir/ir.h
+++ b/tmp/changes.txt
@@ -12,7 +12,7 @@ namespace core {
namespace ir {

struct Input : torch::CustomClassHolder {
-  Input() {};
+  Input(){};
  Input(
      std::vector<int64_t> shape,
      nvinfer1::DataType dtype = nvinfer1::DataType::kFLOAT,
@@ -42,8 +42,8 @@ struct Input : torch::CustomClassHolder {
struct GraphInputs {
  GraphInputs(std::vector<ir::Input> inputs);
  GraphInputs(torch::jit::IValue& input_signature);
-  torch::jit::IValue input_signature;  // nested Input, full input spec
-  std::vector<Input> inputs;  // flattend Input
+  torch::jit::IValue input_signature; // nested Input, full input spec
+  std::vector<Input> inputs; // flattend Input
  std::vector<std::vector<Input>> collection_inputs; // only support two layer nesting, e.g. ((a, b), [c, d], e)
};

@@ -67,7 +67,9 @@ CollectionInputSpecMap associate_specs_with_collection_inputs(
    ir::GraphInputs graph_inputs,
    StaticParams& static_params);
InputSpecMap pair_input_vals_with_specs(std::vector<const torch::jit::Value*> vals, std::vector<Input> specs);
-CollectionInputSpecMap pair_input_vals_with_specs_collection(std::vector<const torch::jit::Value*> vals, std::vector<std::vector<Input>>& specs);
+CollectionInputSpecMap pair_input_vals_with_specs_collection(
+    std::vector<const torch::jit::Value*> vals,
+    std::vector<std::vector<Input>>& specs);
std::vector<const torch::jit::Value*> get_tensor_inputs(
    std::shared_ptr<torch::jit::Graph>& g,
    StaticParams& static_params);
diff --git a/workspace/tests/core/conversion/converters/test_cast.cpp b/tmp/changes.txt
index 092cdb3..d26c7a0 100644
--- a/workspace/tests/core/conversion/converters/test_cast.cpp
+++ b/tmp/changes.txt
@@ -135,7 +135,6 @@ TEST(Converters, ATenBoolToINT32TensorConvertsCorrectly) {
  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt, 2e-6));
}

-
TEST(Converters, ATenToSingleConvertsCorrectly) {
  const auto graph = R"IR(
    graph(%y.1 : Tensor):
@@ -164,7 +163,6 @@ TEST(Converters, ATenToSingleConvertsCorrectly) {
  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt, 2e-6));
}

-
TEST(Converters, ATenTypeAsConvertsCorrectly) {
  const auto graph = R"IR(
      graph(%0 : Tensor,
diff --git a/workspace/tests/cpp/test_example_tensors.cpp b/tmp/changes.txt
index 3ec8831..256e6f1 100644
--- a/workspace/tests/cpp/test_example_tensors.cpp
+++ b/tmp/changes.txt
@@ -9,7 +9,6 @@ TEST_P(CppAPITests, InputsFromTensors) {
    trt_inputs_ivalues.push_back(in.clone());
  }

-
  auto inputs = std::vector<torch_tensorrt::Input>{trt_inputs_ivalues[0].toTensor()};
  auto spec = torch_tensorrt::ts::CompileSpec(inputs);

diff --git a/workspace/tests/cpp/test_collections.cpp b/tmp/changes.txt
index df2280b..829e82a 100644
--- a/workspace/tests/cpp/test_collections.cpp
+++ b/tmp/changes.txt
@@ -5,9 +5,7 @@
#include "torch/script.h"
#include "torch_tensorrt/torch_tensorrt.h"

-
TEST(CppAPITests, TestCollectionStandardTensorInput) {
-
  std::string path = "tests/modules/standard_tensor_input_scripted.jit.pt";
  torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kHalf);
  std::vector<at::Tensor> inputs;
@@ -24,7 +22,6 @@ TEST(CppAPITests, TestCollectionStandardTensorInput) {
  mod.eval();
  mod.to(torch::kCUDA);

-
  std::vector<torch::jit::IValue> inputs_;

  for (auto in : inputs) {
@@ -52,7 +49,6 @@ TEST(CppAPITests, TestCollectionStandardTensorInput) {
}

TEST(CppAPITests, TestCollectionTupleInput) {
-
  std::string path = "tests/modules/tuple_input_scripted.jit.pt";
  torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kHalf);

@@ -78,14 +74,12 @@ TEST(CppAPITests, TestCollectionTupleInput) {

  auto input_shape_ivalue = torch::jit::IValue(std::move(c10::make_intrusive<torch_tensorrt::Input>(input_shape)));

-
  std::tuple<torch::jit::IValue, torch::jit::IValue> input_shape_tuple(input_shape_ivalue, input_shape_ivalue);

  torch::jit::IValue complex_input_shape(input_shape_tuple);
  std::tuple<torch::jit::IValue> input_tuple2(complex_input_shape);
  torch::jit::IValue complex_input_shape2(input_tuple2);

-
  auto compile_settings = torch_tensorrt::ts::CompileSpec(complex_input_shape2);
  compile_settings.require_full_compilation = false;
  compile_settings.min_block_size = 3;
@@ -100,9 +94,7 @@ TEST(CppAPITests, TestCollectionTupleInput) {
  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toTensor(), trt_out.toTensor(), 1e-5));
}

-
TEST(CppAPITests, TestCollectionListInput) {
-
  std::string path = "tests/modules/list_input_scripted.jit.pt";
  torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kHalf);
  std::vector<at::Tensor> inputs;
@@ -118,7 +110,6 @@ TEST(CppAPITests, TestCollectionListInput) {
  mod.eval();
  mod.to(torch::kCUDA);

-
  std::vector<torch::jit::IValue> inputs_;

  for (auto in : inputs) {
@@ -134,7 +125,6 @@ TEST(CppAPITests, TestCollectionListInput) {

  complex_inputs.push_back(input_list_ivalue);

-
  auto out = mod.forward(complex_inputs);
  LOG_DEBUG("Finish torchscirpt forward");

@@ -146,7 +136,6 @@ TEST(CppAPITests, TestCollectionListInput) {
  list.push_back(input_shape_ivalue);
  list.push_back(input_shape_ivalue);

-
  torch::jit::IValue complex_input_shape(list);
  std::tuple<torch::jit::IValue> input_tuple2(complex_input_shape);
  torch::jit::IValue complex_input_shape2(input_tuple2);
@@ -166,9 +155,7 @@ TEST(CppAPITests, TestCollectionListInput) {
  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toTensor(), trt_out.toTensor(), 1e-5));
}

-
TEST(CppAPITests, TestCollectionTupleInputOutput) {
-
  std::string path = "tests/modules/tuple_input_output_scripted.jit.pt";

  torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kHalf);
@@ -183,7 +170,6 @@ TEST(CppAPITests, TestCollectionTupleInputOutput) {
  mod.eval();
  mod.to(torch::kCUDA);

-
  std::vector<torch::jit::IValue> complex_inputs, complex_inputs_list;
  std::tuple<torch::jit::IValue, torch::jit::IValue> input_tuple(in0, in0);

@@ -196,7 +182,6 @@ TEST(CppAPITests, TestCollectionTupleInputOutput) {

  auto input_shape_ivalue = torch::jit::IValue(std::move(c10::make_intrusive<torch_tensorrt::Input>(input_shape)));

-
  std::tuple<torch::jit::IValue, torch::jit::IValue> input_shape_tuple(input_shape_ivalue, input_shape_ivalue);

  torch::jit::IValue complex_input_shape(input_shape_tuple);
@@ -217,13 +202,13 @@ TEST(CppAPITests, TestCollectionTupleInputOutput) {
  LOG_DEBUG("Finish compile");
  auto trt_out = trt_mod.forward(complex_inputs);

-  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toTuple()->elements()[0].toTensor(), trt_out.toTuple()->elements()[0].toTensor(), 1e-5));
-  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toTuple()->elements()[1].toTensor(), trt_out.toTuple()->elements()[1].toTensor(), 1e-5));
+  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(
+      out.toTuple()->elements()[0].toTensor(), trt_out.toTuple()->elements()[0].toTensor(), 1e-5));
+  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(
+      out.toTuple()->elements()[1].toTensor(), trt_out.toTuple()->elements()[1].toTensor(), 1e-5));
}

-
TEST(CppAPITests, TestCollectionListInputOutput) {
-
  std::string path = "tests/modules/list_input_output_scripted.jit.pt";
  torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kHalf);
  std::vector<at::Tensor> inputs;
@@ -239,7 +224,6 @@ TEST(CppAPITests, TestCollectionListInputOutput) {
  mod.eval();
  mod.to(torch::kCUDA);

-
  std::vector<torch::jit::IValue> inputs_;

  for (auto in : inputs) {
@@ -255,7 +239,6 @@ TEST(CppAPITests, TestCollectionListInputOutput) {

  complex_inputs.push_back(input_list_ivalue);

-
  auto out = mod.forward(complex_inputs);
  LOG_DEBUG("Finish torchscirpt forward");

@@ -263,13 +246,11 @@ TEST(CppAPITests, TestCollectionListInputOutput) {

  auto input_shape_ivalue = torch::jit::IValue(std::move(c10::make_intrusive<torch_tensorrt::Input>(input_shape)));

-
  c10::TypePtr elementType = input_shape_ivalue.type();
  auto list = c10::impl::GenericList(elementType);
  list.push_back(input_shape_ivalue);
  list.push_back(input_shape_ivalue);

-
  torch::jit::IValue complex_input_shape(list);
  std::tuple<torch::jit::IValue> input_tuple2(complex_input_shape);
  torch::jit::IValue complex_input_shape2(input_tuple2);
@@ -288,13 +269,13 @@ TEST(CppAPITests, TestCollectionListInputOutput) {
  LOG_DEBUG("Finish compile");
  auto trt_out = trt_mod.forward(complex_inputs);

-  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toList().vec()[0].toTensor(), trt_out.toList().vec()[0].toTensor(), 1e-5));
-  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toList().vec()[1].toTensor(), trt_out.toList().vec()[1].toTensor(), 1e-5));
+  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(
+      out.toList().vec()[0].toTensor(), trt_out.toList().vec()[0].toTensor(), 1e-5));
+  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(
+      out.toList().vec()[1].toTensor(), trt_out.toList().vec()[1].toTensor(), 1e-5));
}

-
TEST(CppAPITests, TestCollectionComplexModel) {
-
  std::string path = "tests/modules/list_input_tuple_output_scripted.jit.pt";
  torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kHalf);
  std::vector<at::Tensor> inputs;
@@ -310,7 +291,6 @@ TEST(CppAPITests, TestCollectionComplexModel) {
  mod.eval();
  mod.to(torch::kCUDA);

-
  std::vector<torch::jit::IValue> inputs_;

  for (auto in : inputs) {
@@ -326,7 +306,6 @@ TEST(CppAPITests, TestCollectionComplexModel) {

  complex_inputs.push_back(input_list_ivalue);

-
  auto out = mod.forward(complex_inputs);
  LOG_DEBUG("Finish torchscirpt forward");

@@ -339,7 +318,6 @@ TEST(CppAPITests, TestCollectionComplexModel) {
  list.push_back(input_shape_ivalue);
  list.push_back(input_shape_ivalue);

-
  torch::jit::IValue complex_input_shape(list);
  std::tuple<torch::jit::IValue> input_tuple2(complex_input_shape);
  torch::jit::IValue complex_input_shape2(input_tuple2);
@@ -358,6 +336,8 @@ TEST(CppAPITests, TestCollectionComplexModel) {
  LOG_DEBUG("Finish compile");
  auto trt_out = trt_mod.forward(complex_inputs);

-  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toTuple()->elements()[0].toTensor(), trt_out.toTuple()->elements()[0].toTensor(), 1e-5));
-  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toTuple()->elements()[1].toTensor(), trt_out.toTuple()->elements()[1].toTensor(), 1e-5));
+  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(
+      out.toTuple()->elements()[0].toTensor(), trt_out.toTuple()->elements()[0].toTensor(), 1e-5));
+  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(
+      out.toTuple()->elements()[1].toTensor(), trt_out.toTuple()->elements()[1].toTensor(), 1e-5));
}
\ No newline at end of file
diff --git a/workspace/cpp/bin/torchtrtc/main.cpp b/tmp/changes.txt
index 6c207d7..51ec2c5 100644
--- a/workspace/cpp/bin/torchtrtc/main.cpp
+++ b/tmp/changes.txt
@@ -117,8 +117,7 @@ int main(int argc, char** argv) {
      parser, "num_iters", "Number of averaging timing iterations used to select kernels", {"num-avg-timing-iters"});
  args::ValueFlag<uint64_t> workspace_size(
      parser, "workspace_size", "Maximum size of workspace given to TensorRT", {"workspace-size"});
-  args::ValueFlag<uint64_t> dla_sram_size(
-      parser, "dla_sram_size", "DLA managed SRAM size", {"dla-sram-size"});
+  args::ValueFlag<uint64_t> dla_sram_size(parser, "dla_sram_size", "DLA managed SRAM size", {"dla-sram-size"});
  args::ValueFlag<uint64_t> dla_local_dram_size(
      parser, "dla_local_dram_size", "DLA Local DRAM size", {"dla-local-dram-size"});
  args::ValueFlag<uint64_t> dla_global_dram_size(
diff --git a/workspace/cpp/src/compile_spec.cpp b/tmp/changes.txt
index 1fb4c56..432b070 100644
--- a/workspace/cpp/src/compile_spec.cpp
+++ b/tmp/changes.txt
@@ -29,40 +29,38 @@ CompileSpec::CompileSpec(std::vector<std::vector<int64_t>> fixed_sizes) {
}

CompileSpec::CompileSpec(std::vector<Input> inputs) {
-    graph_inputs.inputs = std::move(inputs);
+  graph_inputs.inputs = std::move(inputs);
}

CompileSpec::CompileSpec(torch::jit::IValue input_signature) {
-    graph_inputs.input_signature = input_signature;
+  graph_inputs.input_signature = input_signature;
}

-
-
void to_internal_input_signature(torch::jit::IValue input_ivalue, torch::jit::IValue& converted_ivalue) {
-    if (input_ivalue.isTuple()) {
-      auto input_tuple = input_ivalue.toTuple();
-      std::vector<torch::jit::IValue> converted_elements;
-      for (auto item: input_tuple->elements()) {
-        torch::jit::IValue converted_item;
-        to_internal_input_signature(item, converted_item);
-        converted_elements.push_back(converted_item);
-        auto tuple_ptr = c10::ivalue::Tuple::create(converted_elements);
-        converted_ivalue = torch::jit::IValue(tuple_ptr);
-      }
-    } else if(input_ivalue.isList()) {
-      auto input_list = input_ivalue.toList().vec();
-      c10::TypePtr type = input_list[0].type();
-      auto converted_elements = c10::impl::GenericList(type);
-      for (auto item: input_list) {
-        torch::jit::IValue converted_item;
-        to_internal_input_signature(item, converted_item);
-        converted_elements.push_back(converted_item);
-      }
-      converted_ivalue = torch::jit::IValue(converted_elements);
-    } else if(input_ivalue.isCustomClass()) {
-      torchtrt::core::ir::Input cur_input = to_internal_input(*(input_ivalue.toCustomClass<torchtrt::Input>()));
-      converted_ivalue = torch::jit::IValue(std::move(c10::make_intrusive<torch_tensorrt::core::ir::Input>(cur_input)));
+  if (input_ivalue.isTuple()) {
+    auto input_tuple = input_ivalue.toTuple();
+    std::vector<torch::jit::IValue> converted_elements;
+    for (auto item : input_tuple->elements()) {
+      torch::jit::IValue converted_item;
+      to_internal_input_signature(item, converted_item);
+      converted_elements.push_back(converted_item);
+      auto tuple_ptr = c10::ivalue::Tuple::create(converted_elements);
+      converted_ivalue = torch::jit::IValue(tuple_ptr);
    }
+  } else if (input_ivalue.isList()) {
+    auto input_list = input_ivalue.toList().vec();
+    c10::TypePtr type = input_list[0].type();
+    auto converted_elements = c10::impl::GenericList(type);
+    for (auto item : input_list) {
+      torch::jit::IValue converted_item;
+      to_internal_input_signature(item, converted_item);
+      converted_elements.push_back(converted_item);
+    }
+    converted_ivalue = torch::jit::IValue(converted_elements);
+  } else if (input_ivalue.isCustomClass()) {
+    torchtrt::core::ir::Input cur_input = to_internal_input(*(input_ivalue.toCustomClass<torchtrt::Input>()));
+    converted_ivalue = torch::jit::IValue(std::move(c10::make_intrusive<torch_tensorrt::core::ir::Input>(cur_input)));
+  }
}

torchtrt::core::CompileSpec init_compile_spec(CompileSpec external) {
diff --git a/workspace/cpp/src/torch_tensorrt.cpp b/tmp/changes.txt
index 9381319..22855ae 100644
--- a/workspace/cpp/src/torch_tensorrt.cpp
+++ b/tmp/changes.txt
@@ -53,6 +53,5 @@ void set_device(const int gpu_id) {
  torch_tensorrt::core::set_device(gpu_id);
}

-static auto tensorrt_input_container =
-    torch::class_<Input>("_torch_tensorrt", "Input").def(torch::init<>());
+static auto tensorrt_input_container = torch::class_<Input>("_torch_tensorrt", "Input").def(torch::init<>());
} // namespace torch_tensorrt
diff --git a/workspace/cpp/include/torch_tensorrt/torch_tensorrt.h b/tmp/changes.txt
index 11dc5d7..6a7035e 100644
--- a/workspace/cpp/include/torch_tensorrt/torch_tensorrt.h
+++ b/tmp/changes.txt
@@ -364,7 +364,7 @@ class TORCHTRT_API TensorFormat {
 * signifying a static input shape or a set of three input shapes representing
 * the min, optiminal and max input shapes allowed for the engine.
 */
-struct TORCHTRT_API Input : torch::CustomClassHolder{
+struct TORCHTRT_API Input : torch::CustomClassHolder {
  /// Minimum acceptable input size into the engine
  std::vector<int64_t> min_shape;
  /// Optimal input size into the engine (size optimized for given kernels accept any size in min max range)
@@ -520,7 +520,7 @@ struct TORCHTRT_API Input : torch::CustomClassHolder{
 * This struct can either hold a complex inputs of shape or a flattened one,
 */
struct TORCHTRT_API GraphInputs {
-  torch::jit::IValue input_signature;  // nested Input, full input spec
+  torch::jit::IValue input_signature; // nested Input, full input spec
  std::vector<Input> inputs; // flatten input spec
};

ERROR: Some files do not conform to style guidelines

fix: fix the fallback related issue after merging collection

d479c98

Signed-off-by: Bo Wang <bowa@nvidia.com>

facebook-github-bot added the cla signed label Jul 27, 2022

github-actions bot added component: core Issues re: The core compiler component: partitioning labels Jul 27, 2022

github-actions bot requested review from narendasan and peri044 July 27, 2022 01:38

github-actions bot requested changes Jul 27, 2022

View reviewed changes

bowang007 added 2 commits July 27, 2022 18:37

refactor: still fallback when a trt segment has tuple/list input/output

418d1e5

Signed-off-by: Bo Wang <bowa@nvidia.com>

Merge branch 'squashed_collections' into fix_collection_partitioning

ea7562c

github-actions bot requested changes Jul 28, 2022

View reviewed changes

narendasan merged commit 9403f88 into squashed_collections Jul 28, 2022

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

fix: fix the fallback related issue after merging collection #1206

fix: fix the fallback related issue after merging collection #1206

bowang007 commented Jul 27, 2022

github-actions bot left a comment

github-actions bot left a comment

github-actions bot left a comment

fix: fix the fallback related issue after merging collection #1206

fix: fix the fallback related issue after merging collection #1206

Conversation

bowang007 commented Jul 27, 2022

Description

Type of change

Checklist:

github-actions bot left a comment

Choose a reason for hiding this comment

github-actions bot left a comment

Choose a reason for hiding this comment

github-actions bot left a comment

Choose a reason for hiding this comment