[torch] Add folder for torch.aten.*.Scalar comparisons (llvm#3000)

This folds small version of the tensor-scalar comparison operators as they are commonly used for shape computations. This includes le, lt, ge, gt, eq, and ne.
iree-org · Mar 8, 2024 · 0723584 · 0723584
1 parent 80c7bc3
commit 0723584
Show file tree

Hide file tree

Showing 8 changed files with 889 additions and 591 deletions.
diff --git a/include/torch-mlir/Dialect/Torch/IR/GeneratedTorchOps.td b/include/torch-mlir/Dialect/Torch/IR/GeneratedTorchOps.td
diff --git a/lib/Conversion/TorchOnnxToTorch/DefaultDomainGtoP.cpp b/lib/Conversion/TorchOnnxToTorch/DefaultDomainGtoP.cpp
@@ -591,7 +591,7 @@ void mlir::torch::onnx_c::populateDefaultDomainGtoP(
         Value one = rewriter.create<Torch::ConstantIntOp>(
             loc, intTy, rewriter.getI64IntegerAttr(1));
         Value lt =
-            rewriter.create<Torch::AtenLeScalarOp>(loc, boolTy, indices, zero);
+            rewriter.create<Torch::AtenLtScalarOp>(loc, boolTy, indices, zero);
         Value dim =
             rewriter.create<Torch::AtenSizeIntOp>(loc, intTy, data, index);
         Value add = rewriter.create<Torch::AtenAddScalarOp>(loc, indicesTy,

diff --git a/lib/Dialect/Torch/IR/TorchOps.cpp b/lib/Dialect/Torch/IR/TorchOps.cpp
@@ -1481,6 +1481,197 @@ OpFoldResult AtenEqTensorOp::fold(FoldAdaptor adaptor) {
   return nullptr;
 }
 
+//===----------------------------------------------------------------------===//
+// AtenLeScalarOp
+//===----------------------------------------------------------------------===//
+
+using ComparisonFoldFpOperator = std::function<bool(double, double)>;
+using ComparisonFoldIntOperator = std::function<bool(APInt, APInt, bool)>;
+
+static OpFoldResult comparisonScaleFolder(DenseElementsAttr lhs, Attribute rhs,
+                                          ValueTensorType resultTy,
+                                          ComparisonFoldFpOperator fpFolder,
+                                          ComparisonFoldIntOperator intFolder) {
+  constexpr int64_t kMaxFold = 16;
+  if (!lhs || !rhs || !resultTy)
+    return nullptr;
+  if (!resultTy.hasSizes() || !resultTy.hasDtype())
+    return nullptr;
+
+  for (auto size : resultTy.getSizes())
+    if (size == Torch::kUnknownSize)
+      return nullptr;
+
+  auto ctx = lhs.getContext();
+  auto resultETy = resultTy.getDtype();
+  auto tensorETy = cast<RankedTensorType>(lhs.getType()).getElementType();
+  if (lhs.isSplat()) {
+    if (auto intAttr = dyn_cast<IntegerAttr>(rhs)) {
+      auto unsign = cast<IntegerType>(tensorETy).isUnsigned();
+      auto scalarAP = intAttr.getValue();
+      auto tensorAP = lhs.getSplatValue<IntegerAttr>().getValue();
+      tensorAP = APInt(
+          scalarAP.getBitWidth(),
+          unsign ? tensorAP.getZExtValue() : tensorAP.getSExtValue(), !unsign);
+      auto resultBool = intFolder(tensorAP, scalarAP, unsign);
+      auto resultAP = IntegerAttr::get(IntegerType::get(ctx, 1), resultBool);
+      return DenseElementsAttr::get(resultTy.toBuiltinTensor().clone(resultETy),
+                                    resultAP);
+    }
+
+    if (auto floatAttr = dyn_cast<FloatAttr>(rhs)) {
+      APFloat scalarAP = floatAttr.getValue();
+      APFloat tensorAP = lhs.getSplatValue<FloatAttr>().getValue();
+      auto resultBool =
+          fpFolder(tensorAP.convertToDouble(), scalarAP.convertToDouble());
+      auto resultAP = IntegerAttr::get(IntegerType::get(ctx, 1), resultBool);
+      return DenseElementsAttr::get(resultTy.toBuiltinTensor().clone(resultETy),
+                                    resultAP);
+    }
+    return nullptr;
+  }
+
+  int64_t count = 1;
+  for (auto size : resultTy.getSizes())
+    count *= size;
+
+  if (count > kMaxFold)
+    return nullptr;
+
+  if (auto intAttr = dyn_cast<IntegerAttr>(rhs)) {
+    auto unsign = cast<IntegerType>(tensorETy).isUnsigned();
+    llvm::SmallVector<bool> values;
+    for (auto tensorAP : lhs.getValues<APInt>()) {
+      auto scalarAP = intAttr.getValue();
+      tensorAP = APInt(
+          scalarAP.getBitWidth(),
+          unsign ? tensorAP.getZExtValue() : tensorAP.getSExtValue(), !unsign);
+      auto resultBool = intFolder(tensorAP, scalarAP, unsign);
+      values.push_back(resultBool);
+    }
+    return DenseElementsAttr::get(resultTy.toBuiltinTensor().clone(resultETy),
+                                  values);
+  }
+
+  if (auto floatAttr = dyn_cast<FloatAttr>(rhs)) {
+    llvm::SmallVector<bool> values;
+    for (auto tensorAP : lhs.getValues<APFloat>()) {
+      APFloat scalarAP = floatAttr.getValue();
+      auto resultBool =
+          fpFolder(tensorAP.convertToDouble(), scalarAP.convertToDouble());
+      values.push_back(resultBool);
+    }
+    return DenseElementsAttr::get(resultTy.toBuiltinTensor().clone(resultETy),
+                                  values);
+  }
+
+  return nullptr;
+}
+
+OpFoldResult AtenLeScalarOp::fold(FoldAdaptor adaptor) {
+  auto self = dyn_cast_or_null<DenseElementsAttr>(adaptor.getSelf());
+  auto other = adaptor.getOther();
+  auto resultTy = dyn_cast<ValueTensorType>(getType());
+
+  auto fpFold = [](double lhs, double rhs) -> bool { return lhs <= rhs; };
+
+  auto intFold = [](APInt lhs, APInt rhs, bool unsign) -> bool {
+    return unsign ? lhs.ule(rhs) : lhs.sle(rhs);
+  };
+
+  return comparisonScaleFolder(self, other, resultTy, fpFold, intFold);
+}
+
+//===----------------------------------------------------------------------===//
+// AtenLtScalarOp
+//===----------------------------------------------------------------------===//
+
+OpFoldResult AtenLtScalarOp::fold(FoldAdaptor adaptor) {
+  auto self = dyn_cast_or_null<DenseElementsAttr>(adaptor.getSelf());
+  auto other = adaptor.getOther();
+  auto resultTy = dyn_cast<ValueTensorType>(getType());
+
+  auto fpFold = [](double lhs, double rhs) -> bool { return lhs < rhs; };
+
+  auto intFold = [](APInt lhs, APInt rhs, bool unsign) -> bool {
+    return unsign ? lhs.ult(rhs) : lhs.slt(rhs);
+  };
+
+  return comparisonScaleFolder(self, other, resultTy, fpFold, intFold);
+}
+
+//===----------------------------------------------------------------------===//
+// AtenGtScalarOp
+//===----------------------------------------------------------------------===//
+
+OpFoldResult AtenGtScalarOp::fold(FoldAdaptor adaptor) {
+  auto self = dyn_cast_or_null<DenseElementsAttr>(adaptor.getSelf());
+  auto other = adaptor.getOther();
+  auto resultTy = dyn_cast<ValueTensorType>(getType());
+
+  auto fpFold = [](double lhs, double rhs) -> bool { return lhs > rhs; };
+
+  auto intFold = [](APInt lhs, APInt rhs, bool unsign) -> bool {
+    return unsign ? lhs.ugt(rhs) : lhs.sgt(rhs);
+  };
+
+  return comparisonScaleFolder(self, other, resultTy, fpFold, intFold);
+}
+
+//===----------------------------------------------------------------------===//
+// AtenGeScalarOp
+//===----------------------------------------------------------------------===//
+
+OpFoldResult AtenGeScalarOp::fold(FoldAdaptor adaptor) {
+  auto self = dyn_cast_or_null<DenseElementsAttr>(adaptor.getSelf());
+  auto other = adaptor.getOther();
+  auto resultTy = dyn_cast<ValueTensorType>(getType());
+
+  auto fpFold = [](double lhs, double rhs) -> bool { return lhs >= rhs; };
+
+  auto intFold = [](APInt lhs, APInt rhs, bool unsign) -> bool {
+    return unsign ? lhs.uge(rhs) : lhs.sge(rhs);
+  };
+
+  return comparisonScaleFolder(self, other, resultTy, fpFold, intFold);
+}
+
+//===----------------------------------------------------------------------===//
+// AtenEqScalarOp
+//===----------------------------------------------------------------------===//
+
+OpFoldResult AtenEqScalarOp::fold(FoldAdaptor adaptor) {
+  auto self = dyn_cast_or_null<DenseElementsAttr>(adaptor.getSelf());
+  auto other = adaptor.getOther();
+  auto resultTy = dyn_cast<ValueTensorType>(getType());
+
+  auto fpFold = [](double lhs, double rhs) -> bool { return lhs == rhs; };
+
+  auto intFold = [](APInt lhs, APInt rhs, bool unsign) -> bool {
+    return lhs.eq(rhs);
+  };
+
+  return comparisonScaleFolder(self, other, resultTy, fpFold, intFold);
+}
+
+//===----------------------------------------------------------------------===//
+// AtenNeScalarOp
+//===----------------------------------------------------------------------===//
+
+OpFoldResult AtenNeScalarOp::fold(FoldAdaptor adaptor) {
+  auto self = dyn_cast_or_null<DenseElementsAttr>(adaptor.getSelf());
+  auto other = adaptor.getOther();
+  auto resultTy = dyn_cast<ValueTensorType>(getType());
+
+  auto fpFold = [](double lhs, double rhs) -> bool { return lhs != rhs; };
+
+  auto intFold = [](APInt lhs, APInt rhs, bool unsign) -> bool {
+    return lhs.ne(rhs);
+  };
+
+  return comparisonScaleFolder(self, other, resultTy, fpFold, intFold);
+}
+
 //===----------------------------------------------------------------------===//
 // AtenFloorOp
 //===----------------------------------------------------------------------===//

diff --git a/projects/pt1/e2e_testing/xfail_sets.py b/projects/pt1/e2e_testing/xfail_sets.py
@@ -1495,11 +1495,6 @@
     "FlipNegativeIndexModule_basic",
     "HardsigmoidModule_basic",
     "HardsigmoidRandomModule_basic",
-    "IndexSelectDynamicInputSizeModule_basic",
-    "IndexSelectWholeDimensionModule_basic",
-    "IndexSelectWholeTensorModule_basic",
-    "IndexTensorStaticModule_basic",
-    "IndexTensorStaticNonContiguousWithNoneModule_basic",
     "PixelShuffleModuleStaticRank4Float32_basic",
     "ResNet18Module_basic",
     "SliceCopyEndGreaterThanDimSize_Module_basic",
@@ -1998,24 +1993,15 @@
     "NativeDropoutTrainModule_basic",
     "NativeDropoutTrainStaticShapeModule_basic",
     "ReduceProdDimIntFloatModule_basic",
-    "StdCorrectionAllDimReduceModule_basic",
-    "StdCorrectionKeepDimModule_basic",
     "StdCorrectionLargeInputModule_basic",
     "StdCorrectionModule_basic",
     "StdCorrectionNoneModule_basic",
     "StdDimNoneDimModule_basic",
     "StdUnbiasedModule_basic",
-    "VarCorrectionAllDimReduceModule_basic",
-    "VarCorrectionKeepDimModule_basic",
     "VarCorrectionLargeInputModule_basic",
     "VarCorrectionModule_basic",
     "VarCorrectionNoneModule_basic",
-    "VarDimAllDimReduceModule_basic",
-    "VarDimModule_basic",
-    "VarDimMultiDimModule_basic",
     "VarDimNoneDimModule_basic",
-    "VarDimSingleDimModule_basic",
-    "VarDimUnbiasedModule_basic",
     "VarMeanCorrectionNoneModule_basic",
     "VarMeanUnbiasedModule_basic",
     "VarUnbiasedModule_basic",
@@ -2110,9 +2096,6 @@
     "IndexTensorMultiInputOneDim_basic",
     "IndexTensorMultiInputThreeIndexers_basic",
     "IndexTensorMultiInput_basic",
-    "IndexTensorStaticContiguousWithNoneModule_basic",
-    "SelectIntModule_basic",
-    "SliceSingleIdxModule_basic",
     "ViewFlattenAndExpandModule_basic",
     "ViewSizeDimFollowedByCollapsedOnesModule_basic",
     "ViewSizeDimFollowedByExpandedOnesModule_basic",
@@ -2151,7 +2134,6 @@
     "FlattenDynamicModule_basic",
     "GluStaticModule_basic",
     "GroupNormModule_basic",
-    "IndexSelectDynamicModulebasic",
     "IndexTensorHackedTwinModule3dInput_basic",
     "IndexTensorHackedTwinModule_basic",
     "IndexTensorModule3dInput_basic",
@@ -2169,11 +2151,5 @@
     "TensorsStackPromoteDTypeModule_basic",
 }
 
-if torch_version_for_comparison() < version.parse("2.3.0.dev"):
-    ONNX_XFAIL_SET = ONNX_XFAIL_SET | {
-        # ERROR: dtype (torch.float64) is not equal to golden dtype (torch.float32)
-        "ElementwiseWhereScalarModule_basic",
-    }
-
 ONNX_CRASHING_SET = { }
 
diff --git a/projects/pt1/python/torch_mlir/jit_ir_importer/build_tools/torch_ods_gen.py b/projects/pt1/python/torch_mlir/jit_ir_importer/build_tools/torch_ods_gen.py
@@ -301,12 +301,6 @@ def emit_with_mutating_variants(key, **kwargs):
             "aten::le.Tensor : (Tensor, Tensor) -> (Tensor)",
             "aten::ne.Tensor : (Tensor, Tensor) -> (Tensor)",
             "aten::div.Scalar : (Tensor, Scalar) -> (Tensor)",
-            "aten::ne.Scalar : (Tensor, Scalar) -> (Tensor)",
-            "aten::eq.Scalar : (Tensor, Scalar) -> (Tensor)",
-            "aten::gt.Scalar : (Tensor, Scalar) -> (Tensor)",
-            "aten::ge.Scalar : (Tensor, Scalar) -> (Tensor)",
-            "aten::lt.Scalar : (Tensor, Scalar) -> (Tensor)",
-            "aten::le.Scalar : (Tensor, Scalar) -> (Tensor)",
             "aten::fmod.Scalar : (Tensor, Scalar) -> (Tensor)",
             "aten::masked_fill.Scalar : (Tensor, Tensor, Scalar) -> (Tensor)",
             "aten::clamp : (Tensor, Scalar?, Scalar?) -> (Tensor)",
@@ -347,6 +341,12 @@ def emit_with_mutating_variants(key, **kwargs):
     emit_with_mutating_variants("aten::sub.Scalar : (Tensor, Scalar, Scalar) -> (Tensor)", has_canonicalizer=True)
     emit_with_mutating_variants("aten::mul.Scalar : (Tensor, Scalar) -> (Tensor)", has_canonicalizer=True)
     emit_with_mutating_variants("aten::eq.Tensor : (Tensor, Tensor) -> (Tensor)", has_folder=True)
+    emit_with_mutating_variants("aten::le.Scalar : (Tensor, Scalar) -> (Tensor)", has_folder=True)
+    emit_with_mutating_variants("aten::lt.Scalar : (Tensor, Scalar) -> (Tensor)", has_folder=True)
+    emit_with_mutating_variants("aten::gt.Scalar : (Tensor, Scalar) -> (Tensor)", has_folder=True)
+    emit_with_mutating_variants("aten::ge.Scalar : (Tensor, Scalar) -> (Tensor)", has_folder=True)
+    emit_with_mutating_variants("aten::eq.Scalar : (Tensor, Scalar) -> (Tensor)", has_folder=True)
+    emit_with_mutating_variants("aten::ne.Scalar : (Tensor, Scalar) -> (Tensor)", has_folder=True)
     emit_with_mutating_variants("aten::floor : (Tensor) -> (Tensor)", has_canonicalizer=True)
     emit_with_mutating_variants("aten::masked_fill.Tensor : (Tensor, Tensor, Tensor) -> (Tensor)", has_canonicalizer=True)
 

diff --git a/projects/pt1/python/torch_mlir_e2e_test/test_suite/elementwise.py b/projects/pt1/python/torch_mlir_e2e_test/test_suite/elementwise.py
@@ -413,7 +413,7 @@ def __init__(self):
         ([-1, -1, -1], torch.float32, True),
     ])
     def forward(self, a):
-        return torch.where(a > 0.5, 4.0, 8.0)
+        return torch.where(a > 0.5, 4.0, 8.0).to(torch.float)
 
 
 @register_test_case(module_factory=lambda: ElementwiseWhereScalarModule())

diff --git a/test/Conversion/TorchOnnxToTorch/simple_ops_g_to_p.mlir b/test/Conversion/TorchOnnxToTorch/simple_ops_g_to_p.mlir
@@ -42,7 +42,7 @@ func.func @test_gather_nd(%arg0: !torch.vtensor<[3,4,5],f32>, %arg1: !torch.vten
   // CHECK: %[[AXIS:.+]] = torch.constant.int 0
   // CHECK: %[[ZERO:.+]] = torch.constant.int 0
   // CHECK: %[[ONE:.+]] = torch.constant.int 1
-  // CHECK: %[[LT:.+]] = torch.aten.le.Scalar %arg1, %[[ZERO]]
+  // CHECK: %[[LT:.+]] = torch.aten.lt.Scalar %arg1, %[[ZERO]]
   // CHECK: %[[SZ:.+]] = torch.aten.size.int %arg0, %[[AXIS]]
   // CHECK: %[[ADD:.+]] = torch.aten.add.Scalar %arg1, %[[SZ]], %[[ONE]]
   // CHECK: %[[SEL:.+]] = torch.aten.where.self %[[LT]], %[[ADD]], %arg1
@@ -72,7 +72,7 @@ func.func @test_gather_scalar(%arg0: !torch.vtensor<[3,4,5],f32>, %arg1: !torch.
   // CHECK: %[[AXIS:.+]] = torch.constant.int 0
   // CHECK: %[[ZERO:.+]] = torch.constant.int 0
   // CHECK: %[[ONE:.+]] = torch.constant.int 1
-  // CHECK: %[[LT:.+]] = torch.aten.le.Scalar %arg1, %[[ZERO]]
+  // CHECK: %[[LT:.+]] = torch.aten.lt.Scalar %arg1, %[[ZERO]]
   // CHECK: %[[SZ:.+]] = torch.aten.size.int %arg0, %[[AXIS]]
   // CHECK: %[[ADD:.+]] = torch.aten.add.Scalar %arg1, %[[SZ]], %[[ONE]]
   // CHECK: %[[SEL:.+]] = torch.aten.where.self %[[LT]], %[[ADD]], %arg1