Bump LLVM to llvm/llvm-project@0cb024b and deps (#16114)

- Revert llvm/llvm-project@af78e5d locally. We need to bump tf-nightly to drop the revert. #16173 - Disable tracy capture for CUDA benchmarking due to that it's not progressing. #16157 - Apply a [fix](a863467) for SME compilation failures. - Add fixes for llvm/llvm-project@0a8e3dd Closes #16063 because llvm/llvm-project@974ded9 fixes the issue; the commit is carried in the integrate. --------- Co-authored-by: Benjamin Maxwell <benjamin.maxwell@arm.com> Co-authored-by: Lei Zhang <antiagainst@gmail.com>
iree-org · Jan 22, 2024 · d78f716 · d78f716
1 parent 3998b2d
commit d78f716
Show file tree

Hide file tree

Showing 17 changed files with 37 additions and 40 deletions.
diff --git a/build_tools/benchmarks/run_benchmarks.sh b/build_tools/benchmarks/run_benchmarks.sh
@@ -42,15 +42,16 @@ if [[ "${TARGET_DEVICE_NAME}" == "a2-highgpu-1g" ]]; then
     gcr.io/iree-oss/nvidia-bleeding-edge@sha256:2eb17e2e8e0d5d25600e667e3b7a71e8d39d9d12a99ec2ad6bdabbc8919db43b \
       ./build_tools/benchmarks/run_benchmarks_on_linux.py \
         --normal_benchmark_tool_dir="${NORMAL_BENCHMARK_TOOLS_DIR}" \
-        --traced_benchmark_tool_dir="${TRACED_BENCHMARK_TOOLS_DIR}" \
-        --trace_capture_tool="${TRACY_CAPTURE_TOOL}" \
-        --capture_tarball="${BENCHMARK_TRACES}" \
         --e2e_test_artifacts_dir="${E2E_TEST_ARTIFACTS_DIR}" \
         --execution_benchmark_config="${EXECUTION_BENCHMARK_CONFIG}" \
         --target_device_name="${TARGET_DEVICE_NAME}" \
         --shard_index="${SHARD_INDEX}" \
         --output="${BENCHMARK_RESULTS}" \
         --verbose
+        # TODO(#16157): Renable tracy capture after fixing unresponsiveness.
+        # --traced_benchmark_tool_dir="${TRACED_BENCHMARK_TOOLS_DIR}" \
+        # --trace_capture_tool="${TRACY_CAPTURE_TOOL}" \
+        # --capture_tarball="${BENCHMARK_TRACES}" \
 elif [[ "${TARGET_DEVICE_NAME}" =~ ^(c2-standard-16|c2-standard-60)$ ]]; then
   ${DOCKER_WRAPPER} \
     gcr.io/iree-oss/base-bleeding-edge@sha256:14200dacca3a0f3a66f8aa87c6f64729b83a2eeb403b689c24204074ad157418 \

diff --git a/compiler/src/iree/compiler/Codegen/Common/BufferizationAnalysis.cpp b/compiler/src/iree/compiler/Codegen/Common/BufferizationAnalysis.cpp
@@ -238,7 +238,7 @@ getTiedOperandsForDPSOps(DestinationStyleOpInterface dpsOp,
 /// same equivalence class.
 static LogicalResult analyseDPSOps(DestinationStyleOpInterface dpsOp,
                                    BufferizationPlan &plan) {
-  if (!dpsOp.hasTensorSemantics())
+  if (!dpsOp.hasPureTensorSemantics())
     return success();
   auto results = dpsOp->getResults();
   auto tiedOperands = getTiedOperandsForDPSOps(dpsOp, plan);

diff --git a/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoPackUnPack.cpp b/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoPackUnPack.cpp
@@ -304,7 +304,7 @@ static FailureOr<Operation *> lowerOpWithEncoding(
     ArrayRef<Value> operands, MaterializeEncodingFn materializeEncodingFn,
     MaterializeEncodingValueFn) {
   auto linalgOp = dyn_cast<linalg::LinalgOp>(op.getOperation());
-  if (!linalgOp.hasTensorSemantics())
+  if (!linalgOp.hasPureTensorSemantics())
     return failure();
 
   auto inputs = linalgOp.getDpsInputOperands();
@@ -376,7 +376,7 @@ lowerOpWithEncoding(RewriterBase &rewriter, linalg::FillOp fillOp,
                     ValueRange convertedInputOperands,
                     ValueRange convertedOutputOperands, MaterializeEncodingFn,
                     MaterializeEncodingValueFn) {
-  if (!fillOp.hasTensorSemantics())
+  if (!fillOp.hasPureTensorSemantics())
     return failure();
   Operation *materializedFillOp = rewriter.create<linalg::FillOp>(
       fillOp.getLoc(), convertedOutputOperands[0].getType(),
@@ -429,7 +429,7 @@ lowerOpWithEncoding(RewriterBase &rewriter, linalg::GenericOp genericOp,
                     ValueRange convertedInputOperands,
                     ValueRange convertedOutputOperands, MaterializeEncodingFn,
                     MaterializeEncodingValueFn) {
-  if (!genericOp.hasTensorSemantics() || !isElementwise(genericOp) ||
+  if (!genericOp.hasPureTensorSemantics() || !isElementwise(genericOp) ||
       genericOp.getNumDpsInputs() != 1 || genericOp.getNumDpsInits() != 1) {
     return rewriter.notifyMatchFailure(genericOp,
                                        "linalg.generic op is not elementwise "

diff --git a/compiler/src/iree/compiler/Codegen/Interfaces/BufferizationInterfaces.cpp b/compiler/src/iree/compiler/Codegen/Interfaces/BufferizationInterfaces.cpp
@@ -281,12 +281,12 @@ static LogicalResult bufferizeLinalgExtOp(RewriterBase &rewriter,
   rewriter.setInsertionPoint(op);
 
   // Nothing to do. This op is already bufferized.
-  if (dspOp.hasBufferSemantics())
+  if (dspOp.hasPureBufferSemantics())
     return success();
 
   // Ensure op has only tensors. Allow mixed tensor-buffer mode on a per-need
   // basis.
-  if (!dspOp.hasTensorSemantics())
+  if (!dspOp.hasPureTensorSemantics())
     return op->emitError() << "op does not have tensor semantics";
 
   // New input operands for the cloned op.

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/ConvertToLLVM.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/ConvertToLLVM.cpp
@@ -1034,13 +1034,6 @@ void ConvertToLLVMPass::runOnOperation() {
   }
 
   LLVMConversionTarget target(getContext());
-  bool hasAArch64SME = isAArch64(targetAttr) && hasSMEFeature(targetAttr);
-  if (hasAArch64SME) {
-    // Enable ArmSME to LLVM lowerings.
-    configureArmSMEToLLVMConversionLegality(target);
-    populateArmSMEToLLVMConversionPatterns(typeConverter, patterns);
-  }
-
   populateAffineToStdConversionPatterns(patterns);
   populateSCFToControlFlowConversionPatterns(patterns);
   cf::populateControlFlowToLLVMConversionPatterns(typeConverter, patterns);
@@ -1079,7 +1072,6 @@ void ConvertToLLVMPass::runOnOperation() {
   target.addIllegalDialect<func::FuncDialect, mlir::arith::ArithDialect,
                            IREE::Util::UtilDialect, IREE::HAL::HALDialect,
                            math::MathDialect, tosa::TosaDialect>();
-  target.addIllegalOp<UnrealizedConversionCastOp>();
 
   if (failed(applyPartialConversion(module, target, std::move(patterns)))) {
     signalPassFailure();

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
@@ -209,7 +209,7 @@ getVectorPreProcStrategy(linalg::LinalgOp linalgOp) {
   }
 
   // Select a strategy based on heuristics.
-  if (linalgOp.hasBufferSemantics()) {
+  if (linalgOp.hasPureBufferSemantics()) {
     return VectorPreProcStrategy::None;
   }
 
@@ -1586,7 +1586,7 @@ setDefaultGenericOpRootConfig(func::FuncOp entryPointFn,
 
   // For non-tensor based ops use the Buffer ops pipeline.
   DispatchLoweringPassPipeline passPipeline;
-  if (genericOp.hasTensorSemantics()) {
+  if (genericOp.hasPureTensorSemantics()) {
     passPipeline =
         vecPreProcStrategy == VectorPreProcStrategy::Peeling
             ? DispatchLoweringPassPipeline::CPUDoubleTilingPeelingExpert
@@ -1687,7 +1687,7 @@ setTransposeLikeOpRootConfig(func::FuncOp entryPointFn,
 
   // For non-tensor based ops use the Buffer ops pipeline.
   auto passPipeline =
-      genericOp.hasTensorSemantics()
+      genericOp.hasPureTensorSemantics()
           ? DispatchLoweringPassPipeline::CPUDoubleTilingExpert
           : DispatchLoweringPassPipeline::CPUBufferOpsTileAndVectorize;
   return setOpConfigAndEntryPointFnTranslation(entryPointFn, genericOp,
@@ -1770,7 +1770,7 @@ static LogicalResult setElementwiseGenericOpRootConfig(
                        << "\n");
 
   DispatchLoweringPassPipeline passPipeline;
-  if (genericOp.hasBufferSemantics()) {
+  if (genericOp.hasPureBufferSemantics()) {
     passPipeline = DispatchLoweringPassPipeline::CPUBufferOpsTileAndVectorize;
   } else if (vecPreProcStrategy == VectorPreProcStrategy::Peeling) {
     passPipeline = DispatchLoweringPassPipeline::CPUDoubleTilingPeelingExpert;

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUSplitReduction.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUSplitReduction.cpp
@@ -42,7 +42,7 @@ LogicalResult splitReductionPrecondition(Operation *op,
                                          bool fpReductionReordering) {
   linalg::LinalgOp linalgOp = cast<linalg::LinalgOp>(op);
 
-  if (!linalgOp.hasTensorSemantics()) {
+  if (!linalgOp.hasPureTensorSemantics()) {
     LLVM_DEBUG(llvm::dbgs() << "doesn't have tensor semantics\n");
     return failure();
   }

diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp
@@ -12,6 +12,7 @@
 #include "iree/compiler/Codegen/LLVMCPU/Passes.h"
 #include "llvm/ADT/TypeSwitch.h"
 #include "llvm/Support/CommandLine.h"
+#include "mlir/Conversion/ArmSMEToLLVM/ArmSMEToLLVM.h"
 #include "mlir/Conversion/ArmSMEToSCF/ArmSMEToSCF.h"
 #include "mlir/Conversion/ComplexToStandard/ComplexToStandard.h"
 #include "mlir/Conversion/ReconcileUnrealizedCasts/ReconcileUnrealizedCasts.h"
@@ -674,6 +675,9 @@ static void addLowerToLLVMPasses(OpPassManager &passManager,
     passManager.addNestedPass<func::FuncOp>(
         createInstrumentMemoryAccessesPass());
   }
+  if (enableAArch64SME) {
+    passManager.addPass(createConvertArmSMEToLLVMPass());
+  }
   passManager.addPass(createConvertToLLVMPass(clEnableReassociateFpReductions));
   passManager.addPass(createReconcileUnrealizedCastsPass());
 

diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/KernelConfig.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/KernelConfig.cpp
@@ -846,7 +846,7 @@ bool isCooperativeMatrixFusable(linalg::GenericOp genericOp) {
 }
 
 bool needToPrmoteCForCooperativeMatrix(linalg::LinalgOp matmulOp) {
-  assert(matmulOp.hasTensorSemantics());
+  assert(matmulOp.hasPureTensorSemantics());
   Value result = matmulOp.getOperation()->getResult(0);
   if (!result.hasOneUse())
     return true; // Be conservative.
@@ -1485,7 +1485,7 @@ static LogicalResult setDefaultOpConfig(spirv::ResourceLimitsAttr limits,
   bool vectorizable =
       allowVectorization &&
       // The vectorization pipeline assumes tensor semantics for tiling.
-      linalgOp.hasTensorSemantics() && !linalgOp.hasIndexSemantics() &&
+      linalgOp.hasPureTensorSemantics() && !linalgOp.hasIndexSemantics() &&
       // Require all affine maps to be projected permutation so that we can
       // generate vector transfer ops.
       llvm::all_of(

diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/InterchangeTransposeGenericOps.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/InterchangeTransposeGenericOps.cpp
@@ -25,7 +25,7 @@ struct TransposeGenericOpPattern : public OpRewritePattern<linalg::GenericOp> {
   using OpRewritePattern<linalg::GenericOp>::OpRewritePattern;
   LogicalResult matchAndRewrite(linalg::GenericOp genericOp,
                                 PatternRewriter &rewriter) const override {
-    if (!genericOp.hasTensorSemantics()) {
+    if (!genericOp.hasPureTensorSemantics()) {
       return rewriter.notifyMatchFailure(genericOp, "no tensor semantics");
     }
 

diff --git a/compiler/src/iree/compiler/GlobalOptimization/DetachElementwiseFromNamedOps.cpp b/compiler/src/iree/compiler/GlobalOptimization/DetachElementwiseFromNamedOps.cpp
@@ -39,12 +39,12 @@ struct DetachElementwisePattern
         !isa<linalg::ConvolutionOpInterface>(*linalgOp)) {
       return failure();
     }
-    if (!linalgOp.hasTensorSemantics())
+    if (!linalgOp.hasPureTensorSemantics())
       return failure();
 
     // Nothing to do if the output tensor operand is already a fill op.
     SmallVector<OpOperand *> outputOperands;
-    if (!linalgOp.hasBufferSemantics()) {
+    if (!linalgOp.hasPureBufferSemantics()) {
       outputOperands = llvm::to_vector(
           llvm::map_range(linalgOp.getDpsInitsMutable(),
                           [](OpOperand &opOperand) { return &opOperand; }));

diff --git a/compiler/src/iree/compiler/GlobalOptimization/RaiseSpecialOps.cpp b/compiler/src/iree/compiler/GlobalOptimization/RaiseSpecialOps.cpp
@@ -135,7 +135,7 @@ std::optional<Value> matchGenericFill(linalg::LinalgOp linalgOp) {
 /// operation.
 static FailureOr<linalg::GenericOp>
 raiseTensorExtractToInput(linalg::GenericOp linalgOp, RewriterBase &rewriter) {
-  if (!linalgOp.hasTensorSemantics()) {
+  if (!linalgOp.hasPureTensorSemantics()) {
     return failure();
   }
   if (!isElementwise(linalgOp)) {
@@ -317,7 +317,7 @@ tryRaiseToExtractSlice(AffineMap inputIndexingMap, AffineMap outputIndexingMap,
 /// tensor, and tries to raise it to a view-like operation on the input tensor.
 static FailureOr<Operation *> tryRaiseToView(linalg::GenericOp linalgOp,
                                              RewriterBase &rewriter) {
-  if (!linalgOp.hasTensorSemantics()) {
+  if (!linalgOp.hasPureTensorSemantics()) {
     return failure();
   }
 

diff --git a/compiler/src/iree/compiler/GlobalOptimization/SetEncoding.cpp b/compiler/src/iree/compiler/GlobalOptimization/SetEncoding.cpp
@@ -277,7 +277,7 @@ struct setContractionOpEncoding
   LogicalResult matchAndRewrite(linalg::ContractionOpInterface op,
                                 PatternRewriter &rewriter) const override {
     auto linalgOp = dyn_cast<linalg::LinalgOp>(op.getOperation());
-    if (!linalgOp.hasTensorSemantics()) {
+    if (!linalgOp.hasPureTensorSemantics()) {
       return failure();
     }
     if (getCompilationInfo(linalgOp)) {

diff --git a/compiler/src/iree/compiler/Reducer/Strategies/ReduceLinalgOnTensorsDelta.cpp b/compiler/src/iree/compiler/Reducer/Strategies/ReduceLinalgOnTensorsDelta.cpp
@@ -29,7 +29,7 @@ void mlir::iree_compiler::Reducer::reduceLinalgOnTensorsDelta(
   SmallVector<linalg::LinalgOp> linalgOps;
   SmallVector<linalg::LinalgOp> keepOps;
   module.walk([&](linalg::LinalgOp op) {
-    if (!op.hasTensorSemantics())
+    if (!op.hasPureTensorSemantics())
       return;
     // Op should have at least one tensor input, otherwise the operation is
     // already a fill-like operation.

diff --git a/...al-projects/iree-dialects/include/iree-dialects/Dialect/LinalgExt/Transforms/Transforms.h b/...al-projects/iree-dialects/include/iree-dialects/Dialect/LinalgExt/Transforms/Transforms.h
@@ -51,7 +51,7 @@ splitReduction(PatternRewriter &b, linalg::LinalgOp op,
                const linalg::ControlSplitReductionFn &controlSplitReductionFn,
                const LinalgTransformationFilter &filter,
                bool useAlloc = false) {
-  if (failed(filter.checkAndNotify(b, op)) || !op.hasTensorSemantics() ||
+  if (failed(filter.checkAndNotify(b, op)) || !op.hasPureTensorSemantics() ||
       op.getNumReductionLoops() != 1 || op.getNumDpsInits() != 1 ||
       !op.hasOnlyProjectedPermutations())
     return b.notifyMatchFailure(op, "precondition not met");

diff --git a/llvm-external-projects/iree-dialects/lib/Dialect/LinalgExt/IR/LinalgExtOps.cpp b/llvm-external-projects/iree-dialects/lib/Dialect/LinalgExt/IR/LinalgExtOps.cpp
@@ -823,7 +823,7 @@ FftOp::getTiledImplementation(OpBuilder &builder,
   for (auto out : getOutputs()) {
     tiledOperands.push_back(
         getSlice(builder, getLoc(), out, offsets, sizes, strides));
-    if (hasTensorSemantics()) {
+    if (hasPureTensorSemantics()) {
       resultTypes.push_back(tiledOperands.back().getType());
     }
   }
@@ -1037,7 +1037,7 @@ ScanOp::getTiledImplementation(OpBuilder &builder,
   }
 
   SmallVector<Type, 4> resultTypes;
-  if (hasTensorSemantics()) {
+  if (hasPureTensorSemantics()) {
     resultTypes.push_back(tiledOperands[1].getType());
     resultTypes.push_back(tiledOperands[2].getType());
   }
@@ -1182,7 +1182,7 @@ ReverseOp::getTiledImplementation(OpBuilder &builder,
       getSlice(builder, loc, input(), offsets, sizes, strides));
 
   SmallVector<Type, 4> resultTypes;
-  if (hasTensorSemantics()) {
+  if (hasPureTensorSemantics()) {
     tiledOperands.emplace_back(
         getSlice(builder, loc, output(), mirrorOffsets, sizes, strides));
     resultTypes.push_back(tiledOperands[1].getType());
@@ -1462,7 +1462,7 @@ TopkOp::getTiledImplementation(OpBuilder &builder,
   tiledOperands.emplace_back(
       getSlice(builder, loc, getOutputs()[1], offsets, outputSizes, strides));
   SmallVector<Type, 2> resultTypes;
-  if (hasTensorSemantics()) {
+  if (hasPureTensorSemantics()) {
     resultTypes.push_back(tiledOperands[tiledOperands.size() - 2].getType());
     resultTypes.push_back(tiledOperands[tiledOperands.size() - 1].getType());
   }
@@ -2224,7 +2224,7 @@ WinogradInputTransformOp::getTiledImplementation(OpBuilder &builder,
                                       outputSizes, outputStrides));
 
   SmallVector<Type, 4> resultTypes;
-  if (hasTensorSemantics()) {
+  if (hasPureTensorSemantics()) {
     resultTypes.push_back(tiledOperands[1].getType());
   }
 
@@ -2396,7 +2396,7 @@ FailureOr<TilingResult> WinogradOutputTransformOp::getTiledImplementation(
                                       outputSizes, outputStrides));
 
   SmallVector<Type, 4> resultTypes;
-  if (hasTensorSemantics()) {
+  if (hasPureTensorSemantics()) {
     resultTypes.push_back(tiledOperands[1].getType());
   }
 
@@ -2583,7 +2583,7 @@ AttentionOp::getTiledImplementation(OpBuilder &builder,
                                       queryOutputStrides));
 
   SmallVector<Type> resultTypes;
-  if (hasTensorSemantics())
+  if (hasPureTensorSemantics())
     resultTypes.push_back(tiledOperands[3].getType());
 
   Operation *tiledOp =

diff --git a/third_party/llvm-project b/third_party/llvm-project