From 40b607db77f882922786d4e90001d18de509ca81 Mon Sep 17 00:00:00 2001 From: Alexandre Eichenberger Date: Wed, 25 Sep 2024 08:40:29 -0400 Subject: [PATCH] Cleanup of krnl iterate loops (#2953) Signed-off-by: Alexandre Eichenberger Co-authored-by: Tung D. Le --- docs/LoweringCode.md | 10 ++--- .../Conversion/ZHighToZLow/ZHighToZLow.cpp | 23 +++++------ .../Transform/ZLow/ZLowStickExpansion.cpp | 14 +++---- .../KrnlToAffine/KrnlCopyFromBuffer.cpp | 4 +- .../KrnlToAffine/KrnlCopyToBuffer.cpp | 6 +-- src/Conversion/KrnlToAffine/KrnlMatmul.cpp | 38 ++++++++++--------- src/Conversion/KrnlToAffine/KrnlMemset.cpp | 2 +- .../KrnlToLLVM/ConvertKrnlToLLVM.cpp | 8 ++-- src/Conversion/KrnlToLLVM/KrnlEntryPoint.cpp | 12 +++--- .../ONNXToKrnl/Additional/LayoutTransform.cpp | 10 ++--- .../ONNXToKrnl/Additional/ShapeTransform.cpp | 2 +- .../ONNXToKrnl/ControlFlow/Loop.cpp | 6 +-- .../ONNXToKrnl/ControlFlow/Scan.cpp | 4 +- .../ONNXToKrnl/ML/CategoryMapper.cpp | 4 +- src/Conversion/ONNXToKrnl/Math/CumSum.cpp | 11 +++--- .../ONNXToKrnl/Math/Elementwise.cpp | 12 +++--- src/Conversion/ONNXToKrnl/Math/Gemm.cpp | 22 ++++++----- src/Conversion/ONNXToKrnl/Math/Hardmax.cpp | 10 ++--- src/Conversion/ONNXToKrnl/Math/LRN.cpp | 2 +- src/Conversion/ONNXToKrnl/Math/MatMul.cpp | 11 +++--- src/Conversion/ONNXToKrnl/Math/Reduction.cpp | 25 ++++++------ src/Conversion/ONNXToKrnl/Math/Softmax.cpp | 12 +++--- src/Conversion/ONNXToKrnl/Math/TopK.cpp | 2 +- src/Conversion/ONNXToKrnl/Math/Trilu.cpp | 2 +- src/Conversion/ONNXToKrnl/NN/Conv.cpp | 6 +-- .../ONNXToKrnl/NN/Normalization.cpp | 31 +++++++-------- src/Conversion/ONNXToKrnl/NN/Pooling.cpp | 2 +- .../ONNXToKrnl/ONNXToKrnlCommon.cpp | 11 +++--- .../ObjectDetection/NonMaxSuppression.cpp | 14 +++---- .../Quantization/QuantizeLinear.cpp | 2 +- src/Conversion/ONNXToKrnl/RNN/GRU.cpp | 6 +-- src/Conversion/ONNXToKrnl/RNN/LSTM.cpp | 2 +- src/Conversion/ONNXToKrnl/RNN/RNN.cpp | 2 +- src/Conversion/ONNXToKrnl/RNN/RNNBase.cpp | 17 +++++---- src/Conversion/ONNXToKrnl/RNN/RNNBase.hpp | 34 ++++++++--------- .../ONNXToKrnl/Sequence/SequenceErase.cpp | 18 ++------- .../ONNXToKrnl/Sequence/SequenceInsert.cpp | 18 ++------- .../ONNXToKrnl/Tensor/ArgMinMax.cpp | 4 +- src/Conversion/ONNXToKrnl/Tensor/Compress.cpp | 24 ++++++------ src/Conversion/ONNXToKrnl/Tensor/Concat.cpp | 2 +- .../Tensor/ConcatShapeTranspose.cpp | 2 +- .../ONNXToKrnl/Tensor/ConstantOfShape.cpp | 2 +- src/Conversion/ONNXToKrnl/Tensor/Expand.cpp | 2 +- src/Conversion/ONNXToKrnl/Tensor/Gather.cpp | 2 +- .../ONNXToKrnl/Tensor/GatherElements.cpp | 2 +- src/Conversion/ONNXToKrnl/Tensor/GatherND.cpp | 6 +-- src/Conversion/ONNXToKrnl/Tensor/NonZero.cpp | 8 ++-- src/Conversion/ONNXToKrnl/Tensor/OneHot.cpp | 8 ++-- src/Conversion/ONNXToKrnl/Tensor/Pad.cpp | 4 +- src/Conversion/ONNXToKrnl/Tensor/Range.cpp | 2 +- src/Conversion/ONNXToKrnl/Tensor/Resize.cpp | 4 +- .../ONNXToKrnl/Tensor/ReverseSequence.cpp | 2 +- .../ONNXToKrnl/Tensor/ScatterElements.cpp | 2 +- .../ONNXToKrnl/Tensor/ScatterND.cpp | 2 +- src/Conversion/ONNXToKrnl/Tensor/Slice.cpp | 2 +- src/Conversion/ONNXToKrnl/Tensor/Split.cpp | 2 +- src/Conversion/ONNXToKrnl/Tensor/Tile.cpp | 2 +- .../ONNXToKrnl/Tensor/Transpose.cpp | 4 +- src/Dialect/Krnl/DialectBuilder.cpp | 26 ++++++------- src/Dialect/Krnl/DialectBuilder.hpp | 18 +++++---- src/Dialect/Mlir/DialectBuilder.cpp | 5 +-- src/Dialect/Mlir/DialectBuilder.hpp | 27 ++++++------- src/Dialect/Mlir/DialectBuilder.hpp.inc | 20 +++++----- src/Dialect/Mlir/IndexExpr.cpp | 4 +- src/Dialect/Mlir/IndexExpr.hpp | 4 +- 65 files changed, 291 insertions(+), 316 deletions(-) diff --git a/docs/LoweringCode.md b/docs/LoweringCode.md index 34d6523e8b..db0b154092 100644 --- a/docs/LoweringCode.md +++ b/docs/LoweringCode.md @@ -105,7 +105,7 @@ struct KrnlBuilder : public DialectBuilder { void iterate(ValueRange originalLoops, ValueRange optimizedLoops, ValueRange lbs, ValueRange ubs, - function_ref + function_ref bodyBuilderFn); }; ``` @@ -128,7 +128,7 @@ ValueRange loopDef = createKrnl.defineLoops(2); // Create the loop. createKrnl.iterate(loopDef, loopDef, {zero, zero}, {ub0, ub1}, - [&](KrnlBuilder &createKrnl, ValueRange loopInd){ + [&](const KrnlBuilder &createKrnl, ValueRange loopInd){ // Loop body. createKrnl.store(zero, array, loopInd); }); @@ -183,7 +183,7 @@ ValueRange loopBlockDef = createKrnl.block(loopDef, 4); createKrnl.permute({loopBlockDef[0], loopBlockDef[1], {0,1}); // Create the loop iterating over the blocks. createKrnl.iterate(loopDef, {loopBlockDef[0], loopBlockDef[0]}, {zero}, {ub0}, - [&](KrnlBuilder &createKrnl, ValueRange blockLoopInd){ + [&](const KrnlBuilder &createKrnl, ValueRange blockLoopInd){ // Loop body. createKrnl.store(zero, array, loopInd); }); @@ -209,10 +209,10 @@ We now consider tiling our original 2-dimensional example below. // Create the loop iterating over the blocks. createKrnl.iterate(loopDef, {outerLoopBlockDef[0], innerLoopBlockDef[0]}, {zero, zero}, {ub0, ub1}, - [&](KrnlBuilder &createKrnl, ValueRange blockLoopInd){ + [&](const KrnlBuilder &createKrnl, ValueRange blockLoopInd){ // Create the loop iterating inside the blocks. createKrnl.iterate({}, {outerLoopBlockDef[1], innerLoopBlockDef[1]}, - {}, {}, [&](KrnlBuilder &createKrnl, ValueRange loopInd) { + {}, {}, [&](const KrnlBuilder &createKrnl, ValueRange loopInd) { // Loop body. createKrnl.store(zero, array, loopInd); }); diff --git a/src/Accelerators/NNPA/Conversion/ZHighToZLow/ZHighToZLow.cpp b/src/Accelerators/NNPA/Conversion/ZHighToZLow/ZHighToZLow.cpp index 0e56726668..adff6073e9 100644 --- a/src/Accelerators/NNPA/Conversion/ZHighToZLow/ZHighToZLow.cpp +++ b/src/Accelerators/NNPA/Conversion/ZHighToZLow/ZHighToZLow.cpp @@ -1315,7 +1315,7 @@ struct ZHighToZLowFixGRUYOpLowering : public ConversionPattern { Value iZero = create.math.constantIndex(0); ValueRange batchLoop = create.krnl.defineLoops(1); create.krnl.iterate(batchLoop, batchLoop, {iZero}, {create.mem.dim(Y, 2)}, - [&](KrnlBuilder &createKrnl, ValueRange batchIndices) { + [&](const KrnlBuilder &createKrnl, ValueRange batchIndices) { MathBuilder createMath(createKrnl); IndexExprScope ieScope(createKrnl); Value bs = batchIndices[0]; @@ -1338,7 +1338,7 @@ struct ZHighToZLowFixGRUYOpLowering : public ConversionPattern { rewriter.setInsertionPointToStart(®ionOp.getBodyRegion().front()); ValueRange loops = create.krnl.defineLoops(yRank - 1); create.krnl.iterate(loops, loops, yLbs, yUbs, - [&](KrnlBuilder &createKrnl, ValueRange indices) { + [&](const KrnlBuilder &createKrnl, ValueRange indices) { Value sequenceIV(indices[0]); Value directionIV(indices[1]); Value hs(indices[2]); @@ -1366,7 +1366,7 @@ struct ZHighToZLowFixGRUYOpLowering : public ConversionPattern { ValueRange loops = create.krnl.defineLoops(yRank); create.krnl.iterate(loops, loops, yLbs, yUbs, - [&](KrnlBuilder &createKrnl, ValueRange indices) { + [&](const KrnlBuilder &createKrnl, ValueRange indices) { MathBuilder createMath(createKrnl); IndexExprScope ieScope(createKrnl); Value sequenceIV(indices[0]); @@ -1435,7 +1435,7 @@ struct ZHighToZLowFixGRUYhOpLowering : public ConversionPattern { Value seqSize = create.mem.dim(Y, 0); ValueRange loops = create.krnl.defineLoops(htRank); create.krnl.iterate(loops, loops, htLbs, htUbs, - [&](KrnlBuilder &createKrnl, ValueRange indices) { + [&](const KrnlBuilder &createKrnl, ValueRange indices) { MathBuilder createMath(createKrnl); IndexExprScope ieScope(createKrnl); Value bs(indices[1]), hs(indices[2]); @@ -1612,7 +1612,7 @@ struct ZHighToZLowStickifiedConstantOfShapeOpLowering SmallVector lbs(rank, LitIE(0)); SmallVector ubs = shapeHelper.getOutputDims(); create.krnl.iterateIE(loopDef, loopDef, lbs, ubs, - [&](KrnlBuilder &createKrnl, ValueRange indices) { + [&](const KrnlBuilder &createKrnl, ValueRange indices) { // Keep this load inside the loop to tweak LLVM. Value valueF16 = createKrnl.load(memrefF16); createKrnl.store(valueF16, res, indices); @@ -1701,13 +1701,10 @@ struct ZHighToZLowDataConversionLowering SmallVector flattenedOutputDims; Value flatOutput = create.mem.reshapeToFlatInnermost( alloc, outputDims, flattenedOutputDims, collapsedInnermostLoops); - DimsExpr lbs(1, LitIE(0)); // Create loop iteration (flattened to 1D) and block it by totVL. - ValueRange loopDef = create.krnl.defineLoops(1); - ValueRange blockedLoopDef = create.krnl.block(loopDef[0], totVL); - SmallVector optimizedLoopDef(1, blockedLoopDef[0]); - + DimsExpr lbs = {LitIE(0)}; + bool useParallel = false; if (enableParallel) { int64_t parId; int64_t tripCount = flattenedOutputDims[0].isLiteral() @@ -1716,7 +1713,7 @@ struct ZHighToZLowDataConversionLowering : -1; if (findSuitableParallelDimension(lbs, flattenedOutputDims, 0, 1, parId, /*min iter for going parallel*/ 1024)) { - create.krnl.parallel(blockedLoopDef[0]); + useParallel = true; onnxToKrnlParallelReport(op, /*successful*/ true, 0, tripCount, "dlf16-f32 conversion fully parallelized"); } else { @@ -1729,8 +1726,8 @@ struct ZHighToZLowDataConversionLowering : -1, "dlf16-f32 conversion fully flattened"); - create.krnl.iterateIE(loopDef, optimizedLoopDef, lbs, flattenedOutputDims, - [&](KrnlBuilder &b, ValueRange loopInd) { + create.krnl.forLoopIE(lbs[0], flattenedOutputDims[0], totVL, useParallel, + [&](const KrnlBuilder &b, ValueRange loopInd) { MDBuilder create(b); // Manually unrolled loop, add archVL offset at each iterations. for (int64_t u = 0; u < unrollVL; ++u) { diff --git a/src/Accelerators/NNPA/Transform/ZLow/ZLowStickExpansion.cpp b/src/Accelerators/NNPA/Transform/ZLow/ZLowStickExpansion.cpp index 9c0b66b432..12d1cf0fbc 100644 --- a/src/Accelerators/NNPA/Transform/ZLow/ZLowStickExpansion.cpp +++ b/src/Accelerators/NNPA/Transform/ZLow/ZLowStickExpansion.cpp @@ -149,8 +149,8 @@ class UnstickExpansionPattern : public OpRewritePattern { create.mem.reinterpretCast(input, litZero.getValue(), reallocTileDims); // Outer loop (E4, E3, E2, E1 iterates over tiles of 64 elements) - create.krnl.iterateIE( - loopDefs, loopDefs, lbs, ubs, [&](KrnlBuilder &b, ValueRange loopInd) { + create.krnl.iterateIE(loopDefs, loopDefs, lbs, ubs, + [&](const KrnlBuilder &b, ValueRange loopInd) { MDBuilder create(b); IndexExprScope outerScope(create.krnl, &allocScope); DimsExpr outerIndices = DimListIE(loopInd); @@ -192,14 +192,14 @@ class UnstickExpansionPattern : public OpRewritePattern { // Condition isFullLogical.getValue(), // Then (is full). - [&](SCFBuilder b) { + [&](const SCFBuilder b) { MDBuilder create(b); // Loop (tried unroll of 2 and 8, 4 was best). const int64_t unrollVL = 4; const int64_t totVL = unrollVL * archVL; assert(totVL <= 64 && "bad unroll"); create.scf.forLoop(litZero.getValue(), lit64.getValue(), totVL, - [&](SCFBuilder b, ValueRange loopInd) { + [&](const SCFBuilder b, ValueRange loopInd) { MDBuilder create(b); IndexExprScope innerScope(b, &outerScope); Value loopIndex = loopInd[0]; @@ -430,8 +430,8 @@ class StickExpansionPattern : public OpRewritePattern { create.mem.reinterpretCast(alloc, litZero.getValue(), reallocTileDims); // Outer loop (E1 iterates over tiles of 64 elements). - create.krnl.iterateIE( - loopDefs, loopDefs, lbs, ubs, [&](KrnlBuilder &b, ValueRange loopInd) { + create.krnl.iterateIE(loopDefs, loopDefs, lbs, ubs, + [&](const KrnlBuilder &b, ValueRange loopInd) { MDBuilder create(b); IndexExprScope outerScope(create.krnl, &allocScope); DimsExpr outerIndices; @@ -458,7 +458,7 @@ class StickExpansionPattern : public OpRewritePattern { #endif create.affine.forLoopIE(litZero, simdLoopUB, totVL, - [&](AffineBuilder &b, ValueRange loopInd) { + [&](const AffineBuilder &b, ValueRange loopInd) { MDBuilder create(b); DimsExpr inputAF; IndexExprScope innerScope(create.krnl, &outerScope); diff --git a/src/Conversion/KrnlToAffine/KrnlCopyFromBuffer.cpp b/src/Conversion/KrnlToAffine/KrnlCopyFromBuffer.cpp index ff806f1d3a..5b5f761a35 100644 --- a/src/Conversion/KrnlToAffine/KrnlCopyFromBuffer.cpp +++ b/src/Conversion/KrnlToAffine/KrnlCopyFromBuffer.cpp @@ -90,7 +90,7 @@ class KrnlCopyFromBufferLowering : public ConversionPattern { return success(); } - void genCopyLoops(AffineBuilderKrnlMem &createAffine, + void genCopyLoops(const AffineBuilderKrnlMem &createAffine, IndexExprScope *enclosingScope, Value buffMemref, Value destMemref, IndexExpr zeroIE, SmallVectorImpl &starts, SmallVectorImpl &writeUBs, SmallVectorImpl &loopIndices, @@ -125,7 +125,7 @@ class KrnlCopyFromBufferLowering : public ConversionPattern { } else { // Loop to copy the data. createAffine.forLoopIE(zeroIE, writeUBs[i], 1, false /*parallel*/, - [&](AffineBuilderKrnlMem &createAffine, ValueRange loopInd) { + [&](const AffineBuilderKrnlMem &createAffine, ValueRange loopInd) { loopIndices.emplace_back(loopInd[0]); genCopyLoops(createAffine, enclosingScope, buffMemref, destMemref, zeroIE, starts, writeUBs, loopIndices, i + 1, buffRank); diff --git a/src/Conversion/KrnlToAffine/KrnlCopyToBuffer.cpp b/src/Conversion/KrnlToAffine/KrnlCopyToBuffer.cpp index 9ddda7e86d..7e6bbf20d2 100644 --- a/src/Conversion/KrnlToAffine/KrnlCopyToBuffer.cpp +++ b/src/Conversion/KrnlToAffine/KrnlCopyToBuffer.cpp @@ -129,7 +129,7 @@ class KrnlCopyToBufferLowering : public ConversionPattern { return success(); } - void genCopyLoops(AffineBuilderKrnlMem &createAffine, + void genCopyLoops(const AffineBuilderKrnlMem &createAffine, IndexExprScope *enclosingScope, Value buffMemref, Value sourceMemref, SmallVectorImpl &srcLoopMap, Value padVal, IndexExpr zeroIE, SmallVectorImpl &starts, SmallVectorImpl &readUBs, @@ -169,7 +169,7 @@ class KrnlCopyToBufferLowering : public ConversionPattern { // Nothing to read, skip. } else { createAffine.forLoopIE(zeroIE, readUBs[i], 1, - [&](AffineBuilderKrnlMem &createAffine, ValueRange loopInd) { + [&](const AffineBuilderKrnlMem &createAffine, ValueRange loopInd) { loopIndices.emplace_back(loopInd[0]); genCopyLoops(createAffine, enclosingScope, buffMemref, sourceMemref, srcLoopMap, padVal, zeroIE, starts, readUBs, @@ -182,7 +182,7 @@ class KrnlCopyToBufferLowering : public ConversionPattern { // No padding needed. } else { createAffine.forLoopIE(readUBs[i], padUBs[i], 1, - [&](AffineBuilderKrnlMem &createAffine, ValueRange loopInd) { + [&](const AffineBuilderKrnlMem &createAffine, ValueRange loopInd) { loopIndices.emplace_back(loopInd[0]); genCopyLoops(createAffine, enclosingScope, buffMemref, sourceMemref, srcLoopMap, padVal, zeroIE, starts, readUBs, diff --git a/src/Conversion/KrnlToAffine/KrnlMatmul.cpp b/src/Conversion/KrnlToAffine/KrnlMatmul.cpp index 6aeceffefa..f5d13bb49e 100644 --- a/src/Conversion/KrnlToAffine/KrnlMatmul.cpp +++ b/src/Conversion/KrnlToAffine/KrnlMatmul.cpp @@ -223,11 +223,11 @@ class KrnlMatmulLowering : public ConversionPattern { if (matVectorProduct) { // clang-format off create.affineKMem.ifThenElseIE(indexScope, allFullTiles, - /* then full tiles */ [&](AffineBuilderKrnlMem &createAffine) { + /* then full tiles */ [&](const AffineBuilderKrnlMem &createAffine) { genSimdMatVect(createAffine, matmulOp, elementType, aStart, bStart, cStart, iComputeTileSize, jComputeTileSize, kComputeTileSize, vectorLen, fullUnrollAndJam); - }, /* else has partial tiles */ [&](AffineBuilderKrnlMem &createAffine) { + }, /* else has partial tiles */ [&](const AffineBuilderKrnlMem &createAffine) { genScalar(createAffine, matmulOp, elementType, aStart, bStart, cStart, iTrip, jTrip, kTrip, /*unroll*/ false); }); @@ -235,18 +235,20 @@ class KrnlMatmulLowering : public ConversionPattern { } else { // clang-format off create.affineKMem.ifThenElseIE(indexScope, allFullTiles, - /* then full tiles */ [&](AffineBuilderKrnlMem &createAffine) { + /* then full tiles */ [&](const AffineBuilderKrnlMem &createAffine) { genSimdMatMat(createAffine, matmulOp, elementType, aStart, bStart, cStart, iComputeTileSize, jComputeTileSize, kComputeTileSize, vectorLen, fullUnrollAndJam); - }, /* has some partial tiles */ [&](AffineBuilderKrnlMem &createAffine) { + }, + /* Else has some partial tiles */ + [&](const AffineBuilderKrnlMem &createAffine) { // Trip regardless of full/partial for N & K // Test if SIMD dim (M) is full. createAffine.ifThenElseIE(indexScope, jFullTiles, - /* full SIMD */ [&](AffineBuilderKrnlMem &createAffine) { + /* full SIMD */ [&](const AffineBuilderKrnlMem &createAffine) { genSimdMatMat(createAffine, matmulOp, elementType, aStart, bStart, cStart, iTrip, jComputeTileSize, kTrip, vectorLen, /*unroll*/ false); - }, /* else partial SIMD */ [&](AffineBuilderKrnlMem &createAffine) { + }, /* else partial SIMD */ [&](const AffineBuilderKrnlMem &createAffine) { // TODO: evaluate if get performance from partial SIMD if (false && jPartialTrip.isLiteral() && jPartialTrip.getLiteral() >=2) { // has a known trip count along the simd dimension of at least 2 @@ -265,11 +267,11 @@ class KrnlMatmulLowering : public ConversionPattern { // Scalar code generator. // clang-format off create.affineKMem.ifThenElseIE(indexScope, allFullTiles, - /* then full */ [&](AffineBuilderKrnlMem &createAffine) { + /* then full */ [&](const AffineBuilderKrnlMem &createAffine) { genScalar(createAffine, matmulOp, elementType, aStart, bStart, cStart, iComputeTileSize, jComputeTileSize, kComputeTileSize, fullUnrollAndJam); - }, /* else partial */ [&](AffineBuilderKrnlMem &createAffine) { + }, /* else partial */ [&](const AffineBuilderKrnlMem &createAffine) { genScalar(createAffine, matmulOp, elementType, aStart, bStart, cStart, iTrip, jTrip, kTrip, false); }); @@ -280,7 +282,7 @@ class KrnlMatmulLowering : public ConversionPattern { } private: - void genScalar(AffineBuilderKrnlMem &createAffine, KrnlMatMulOp op, + void genScalar(const AffineBuilderKrnlMem &createAffine, KrnlMatMulOp op, Type elementType, ArrayRef aStart, ArrayRef bStart, ArrayRef cStart, IndexExpr I, IndexExpr J, IndexExpr K, bool unrollJam) const { @@ -300,10 +302,11 @@ class KrnlMatmulLowering : public ConversionPattern { LiteralIndexExpr zeroIE(0); Value jSaved; createAffine.forLoopIE(zeroIE, I, 1, - [&](AffineBuilderKrnlMem &createAffine, ValueRange loopInd) { + [&](const AffineBuilderKrnlMem &createAffine, ValueRange loopInd) { Value i = loopInd[0]; createAffine.forLoopIE(zeroIE, J, 1, - [&](AffineBuilderKrnlMem &createAffine, ValueRange loopInd) { + [&](const AffineBuilderKrnlMem &createAffine, + ValueRange loopInd) { MathBuilder createMath(createAffine); Value j = loopInd[0]; // Defines induction variables, and possibly initialize C. @@ -315,7 +318,7 @@ class KrnlMatmulLowering : public ConversionPattern { createAffine.store(initVal, TmpC, tmpCAccess); // Sum over k. createAffine.forLoopIE(zeroIE, K, 1, - [&](AffineBuilderKrnlMem &createAffine, + [&](const AffineBuilderKrnlMem &createAffine, ValueRange loopInd) { MathBuilder createMath(createAffine); Value k = loopInd[0]; @@ -340,7 +343,7 @@ class KrnlMatmulLowering : public ConversionPattern { } // Initially, simdize with full K vector length. - void genSimdMatVect(AffineBuilderKrnlMem &createAffine, KrnlMatMulOp op, + void genSimdMatVect(const AffineBuilderKrnlMem &createAffine, KrnlMatMulOp op, Type elementType, ArrayRef aStart, ArrayRef bStart, ArrayRef cStart, IndexExpr I, IndexExpr J, IndexExpr K, IndexExpr vectorLen, bool unrollJam) const { @@ -384,7 +387,7 @@ class KrnlMatmulLowering : public ConversionPattern { Value iZero = create.math.constantIndex(0); create.affineKMem.forLoopIE(zeroIE, K, VL, - [&](AffineBuilderKrnlMem &createAffine, ValueRange loopInd) { + [&](const AffineBuilderKrnlMem &createAffine, ValueRange loopInd) { MultiDialectBuilder create(createAffine); Value k = loopInd[0]; // Iterates over the I indices (K is SIMD dim). @@ -431,7 +434,7 @@ class KrnlMatmulLowering : public ConversionPattern { } // Simdize along J / memory rows in B and C. - void genSimdMatMat(AffineBuilderKrnlMem &createAffine, KrnlMatMulOp op, + void genSimdMatMat(const AffineBuilderKrnlMem &createAffine, KrnlMatMulOp op, Type elementType, ArrayRef aStart, ArrayRef bStart, ArrayRef cStart, IndexExpr I, IndexExpr J, IndexExpr K, IndexExpr vectorLen, bool unrollJam) const { @@ -466,7 +469,7 @@ class KrnlMatmulLowering : public ConversionPattern { Value iZero = create.math.constantIndex(0); createAffine.forLoopIE(zeroIE, I, 1, - [&](AffineBuilderKrnlMem &createAffine, ValueRange loopInd) { + [&](const AffineBuilderKrnlMem &createAffine, ValueRange loopInd) { MultiDialectBuilder create(createAffine); Value i = loopInd[0]; iSaved = i; // Saved for unroll and jam. @@ -476,7 +479,8 @@ class KrnlMatmulLowering : public ConversionPattern { createAffine.store(initVal, TmpC, tmpCAccess); // Sum over k. createAffine.forLoopIE(zeroIE, K, 1, - [&](AffineBuilderKrnlMem &createAffine, ValueRange loopInd) { + [&](const AffineBuilderKrnlMem &createAffine, + ValueRange loopInd) { MultiDialectBuilder create( createAffine); Value k = loopInd[0]; diff --git a/src/Conversion/KrnlToAffine/KrnlMemset.cpp b/src/Conversion/KrnlToAffine/KrnlMemset.cpp index ddfa21089e..0a1f145e91 100644 --- a/src/Conversion/KrnlToAffine/KrnlMemset.cpp +++ b/src/Conversion/KrnlToAffine/KrnlMemset.cpp @@ -59,7 +59,7 @@ class KrnlMemsetLowering : public ConversionPattern { SmallVector steps(rank, 1); // Copy data, create.affineKMem.forLoopsIE(lbs, ubs, steps, - [&](AffineBuilderKrnlMem &createAffine, ValueRange indices) { + [&](const AffineBuilderKrnlMem &createAffine, ValueRange indices) { createAffine.store(destVal, destMemRef, indices); }); rewriter.eraseOp(op); diff --git a/src/Conversion/KrnlToLLVM/ConvertKrnlToLLVM.cpp b/src/Conversion/KrnlToLLVM/ConvertKrnlToLLVM.cpp index 9241ee7594..62db84beff 100644 --- a/src/Conversion/KrnlToLLVM/ConvertKrnlToLLVM.cpp +++ b/src/Conversion/KrnlToLLVM/ConvertKrnlToLLVM.cpp @@ -367,12 +367,12 @@ void genSignatureFunction(ModuleOp &module, // If the argument is not NULL, update its value to return the number of // entry points. create.llvm.ifThenElse(/*cond=*/ - [&](LLVMBuilder &createLLVM) { + [&](const LLVMBuilder &createLLVM) { Value nullPtr = createLLVM.null(i64PtrTy); return createLLVM.icmp( LLVM::ICmpPredicate::ne, numOfEntryPoints, nullPtr); }, /*then=*/ - [&](LLVMBuilder &createLLVM) { + [&](const LLVMBuilder &createLLVM) { Value numOfEntryPointsPtr = createLLVM.getElemPtr( i64PtrTy, i64Type, numOfEntryPoints, ArrayRef{0}); Value noep = @@ -420,7 +420,7 @@ void genSignatureFunction(ModuleOp &module, // Return the signature if found. create.llvm.ifThenElse(/*cond=*/ - [&](LLVMBuilder &createLLVM) { + [&](const LLVMBuilder &createLLVM) { // Read an entry point name. Value entryI8Ptr = krnl::getPtrToGlobalString(globalEntryPoint, loc, b); @@ -434,7 +434,7 @@ void genSignatureFunction(ModuleOp &module, return createLLVM.icmp( LLVM::ICmpPredicate::eq, strncmpResult, zeroI32); }, /*then=*/ - [&](LLVMBuilder &createLLVM) { + [&](const LLVMBuilder &createLLVM) { Value sigAddr = createLLVM.addressOf(globalSignature); Value sigI8Ptr = createLLVM.bitcast(i8PtrTy, sigAddr); createLLVM._return(sigI8Ptr); diff --git a/src/Conversion/KrnlToLLVM/KrnlEntryPoint.cpp b/src/Conversion/KrnlToLLVM/KrnlEntryPoint.cpp index c2cc6125c8..0b1d3faf76 100644 --- a/src/Conversion/KrnlToLLVM/KrnlEntryPoint.cpp +++ b/src/Conversion/KrnlToLLVM/KrnlEntryPoint.cpp @@ -166,7 +166,7 @@ class KrnlEntryPointOpLowering : public OpRewritePattern { // Emit code for `if (OMInitCompatibleAccelX() == 0) then return NULL`. create.llvm.ifThenElse(/*cond=*/ - [&](LLVMBuilder &createLLVM) { + [&](const LLVMBuilder &createLLVM) { // Call OMInitCompatibleAccelX. Value versionNumberVal = createLLVM.constant( int64Ty, static_cast(versionNumberInHex)); @@ -176,7 +176,7 @@ class KrnlEntryPointOpLowering : public OpRewritePattern { return createLLVM.icmp( LLVM::ICmpPredicate::eq, isCompatible, zeroI64); }, /*then=*/ - [&](LLVMBuilder &createLLVM) { + [&](const LLVMBuilder &createLLVM) { // return NULL. createLLVM._return(createLLVM.null(getI8PointerType(context))); }); @@ -418,10 +418,10 @@ class KrnlEntryPointOpLowering : public OpRewritePattern { MLIRContext *context = rewriter.getContext(); MultiDialectBuilder create(rewriter, loc); create.llvm.ifThenElse(/*cond=*/ - [&](LLVMBuilder &createLLVM) { + [&](const LLVMBuilder &createLLVM) { return createLLVM.icmp(LLVM::ICmpPredicate::ne, lhs, rhs); }, /*then=*/ - [&](LLVMBuilder &createLLVM) { + [&](const LLVMBuilder &createLLVM) { MultiDialectBuilder create(createLLVM); // Print an error message. if (appendRHS) @@ -514,13 +514,13 @@ class KrnlEntryPointOpLowering : public OpRewritePattern { // In case that the reference dimension size is unknown, verify that // the actual dimension size is a non-negative value. create.llvm.ifThenElse(/*cond=*/ - [&](LLVMBuilder &createLLVM) { + [&](const LLVMBuilder &createLLVM) { Value zero = createLLVM.constant(int64Ty, static_cast(d)); return createLLVM.icmp( LLVM::ICmpPredicate::slt, actualDim, zero); }, /*then=*/ - [&](LLVMBuilder &createLLVM) { + [&](const LLVMBuilder &createLLVM) { MultiDialectBuilder create( createLLVM); // Print an error message. diff --git a/src/Conversion/ONNXToKrnl/Additional/LayoutTransform.cpp b/src/Conversion/ONNXToKrnl/Additional/LayoutTransform.cpp index 90be6ad957..2657589e97 100644 --- a/src/Conversion/ONNXToKrnl/Additional/LayoutTransform.cpp +++ b/src/Conversion/ONNXToKrnl/Additional/LayoutTransform.cpp @@ -126,8 +126,8 @@ struct ONNXLayoutTransformOpLowering } // Outer loop (E1 iterates over tiles of 64 elements). - create.krnl.iterateIE( - loopDefs, loopDefs, lbs, ubs, [&](KrnlBuilder &b, ValueRange loopInd) { + create.krnl.iterateIE(loopDefs, loopDefs, lbs, ubs, + [&](const KrnlBuilder &b, ValueRange loopInd) { MDBuilder create(b); IndexExprScope outerScope(create.krnl); DimsExpr outerIndices; @@ -162,13 +162,13 @@ struct ONNXLayoutTransformOpLowering // Condition isFullLogical.getValue(), // Then (is full). - [&](SCFBuilder b) { + [&](const SCFBuilder b) { MDBuilder create(b); create.krnl.memcpy( alloc, input, len, allocOffset, inputOffset); }, // Else, we don't have a full tile. - [&](SCFBuilder b) { + [&](const SCFBuilder b) { MDBuilder create(b); IndexExprScope middleScope(b, &outerScope); IndexExpr tripCount = SymIE(ub1) - SymIE(memAF[E1]); @@ -263,7 +263,7 @@ struct ONNXLayoutTransformOpLowering } } create.krnl.iterateIE(loopDef, loopDef, lbs, ubs, - [&](KrnlBuilder &createKrnl, ValueRange indices) { + [&](const KrnlBuilder &createKrnl, ValueRange indices) { // Simply copy the input into the output. Value val = createKrnl.load(data, indices); createKrnl.store(val, alloc, indices); diff --git a/src/Conversion/ONNXToKrnl/Additional/ShapeTransform.cpp b/src/Conversion/ONNXToKrnl/Additional/ShapeTransform.cpp index 8b7b66863d..82f5212596 100644 --- a/src/Conversion/ONNXToKrnl/Additional/ShapeTransform.cpp +++ b/src/Conversion/ONNXToKrnl/Additional/ShapeTransform.cpp @@ -60,7 +60,7 @@ struct ONNXShapeTransformOpLowering : public ConversionPattern { create.krnlIE.getShapeAsDims(input, ubs); create.krnl.iterateIE(loopDef, loopDef, lbs, ubs, - [&](KrnlBuilder &createKrnl, ValueRange inputIndices) { + [&](const KrnlBuilder &createKrnl, ValueRange inputIndices) { Value loadedVal = createKrnl.load(input, inputIndices); // Compute output indices by using affine map. SmallVector outputIndices; diff --git a/src/Conversion/ONNXToKrnl/ControlFlow/Loop.cpp b/src/Conversion/ONNXToKrnl/ControlFlow/Loop.cpp index 1fd44c8090..5eddef0d66 100644 --- a/src/Conversion/ONNXToKrnl/ControlFlow/Loop.cpp +++ b/src/Conversion/ONNXToKrnl/ControlFlow/Loop.cpp @@ -100,7 +100,7 @@ struct ONNXLoopOpLowering : public OpConversionPattern { ValueRange loopDef = createKrnl.defineLoops(1); Value zero = create.math.constantIndex(0); createKrnl.iterate(loopDef, loopDef, {zero}, {maxTripCount}, - [&](KrnlBuilder &createKrnl, ValueRange loopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange loopInd) { OpBuilder::InsertionGuard insertGuard(rewriter); Value condReg = createKrnl.load(cond); @@ -303,7 +303,7 @@ struct ONNXLoopOpLowering : public OpConversionPattern { KrnlBuilder createKrnl(rewriter, loc); ValueRange loopDef = createKrnl.defineLoops(1); createKrnl.iterate(loopDef, loopDef, {zero}, {maxTripCount}, - [&](KrnlBuilder &createKrnl, ValueRange loopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange loopInd) { // Wrap with KrnlRegionOp because emitCopy uses the result of // SeqExtract for loop bound. KrnlRegionOp regionOp = rewriter.create(loc); @@ -456,7 +456,7 @@ struct ONNXLoopOpLowering : public OpConversionPattern { SmallVector ubs; create.krnlIE.getShapeAsDims(src, ubs); create.krnl.iterateIE(loopDef, loopDef, lbs, ubs, - [&](KrnlBuilder &createKrnl, ValueRange loopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange loopInd) { SmallVector writeIV( writePrefix.begin(), writePrefix.end()); writeIV.insert(writeIV.end(), loopInd.begin(), loopInd.end()); diff --git a/src/Conversion/ONNXToKrnl/ControlFlow/Scan.cpp b/src/Conversion/ONNXToKrnl/ControlFlow/Scan.cpp index 282e318d34..62ed56c63d 100644 --- a/src/Conversion/ONNXToKrnl/ControlFlow/Scan.cpp +++ b/src/Conversion/ONNXToKrnl/ControlFlow/Scan.cpp @@ -321,7 +321,7 @@ struct ONNXScanOpLowering : public OpConversionPattern { SmallVector ubs; create.krnlIE.getShapeAsDims(src, ubs); create.krnl.iterateIE(loopDef, loopDef, lbs, ubs, - [&](KrnlBuilder &createKrnl, ValueRange loopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange loopInd) { SmallVector writeIV( writePrefix.begin(), writePrefix.end()); writeIV.insert(writeIV.end(), loopInd.begin(), loopInd.end()); @@ -353,7 +353,7 @@ struct ONNXScanOpLowering : public OpConversionPattern { for (int i = readIV.size(); i < srcTy.getRank(); i++) ubs.emplace_back(create.krnlIE.getShapeAsDim(src, i)); create.krnl.iterateIE(loopDef, loopDef, lbs, ubs, - [&](KrnlBuilder &createKrnl, ValueRange loopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange loopInd) { readIV.insert(readIV.end(), loopInd.begin(), loopInd.end()); Value val = createKrnl.load(src, readIV); createKrnl.store(val, dest, loopInd); diff --git a/src/Conversion/ONNXToKrnl/ML/CategoryMapper.cpp b/src/Conversion/ONNXToKrnl/ML/CategoryMapper.cpp index 5b6e561a78..f71e0e487c 100644 --- a/src/Conversion/ONNXToKrnl/ML/CategoryMapper.cpp +++ b/src/Conversion/ONNXToKrnl/ML/CategoryMapper.cpp @@ -144,7 +144,7 @@ struct ONNXCategoryMapperOpLowering ValueRange loopDef = create.krnl.defineLoops(rank); create.krnl.iterateIE(loopDef, loopDef, lbs, ubs, - [&](KrnlBuilder &createKrnl, ValueRange loopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange loopInd) { // Determine the index of 'inputElem' in the perfect hash table // 'pHash'. Note: the index might not be valid (this happens // when the 'inputElem' is not present in the perfect hash @@ -253,7 +253,7 @@ struct ONNXCategoryMapperOpLowering } Value loadElement(Value memref, ValueRange loopInd, Type elementType, - int64_t rank, KrnlBuilder &createKrnl) const { + int64_t rank, const KrnlBuilder &createKrnl) const { Value inputElem; TypeSwitch(elementType) .Case( diff --git a/src/Conversion/ONNXToKrnl/Math/CumSum.cpp b/src/Conversion/ONNXToKrnl/Math/CumSum.cpp index 4c999dbfb7..017d4f2b9f 100644 --- a/src/Conversion/ONNXToKrnl/Math/CumSum.cpp +++ b/src/Conversion/ONNXToKrnl/Math/CumSum.cpp @@ -159,7 +159,7 @@ struct ONNXCumSumOpLowering : public OpConversionPattern { // Initialize the temporary buffer: copy values from the input. ValueRange initLoopDef = create.krnl.defineLoops(rank); create.krnl.iterateIE(initLoopDef, initLoopDef, lbs, ubs, - [&](KrnlBuilder &ck, ValueRange initLoopInd) { + [&](const KrnlBuilder &ck, ValueRange initLoopInd) { MultiDialectBuilder create(ck); if (!exclusive) { Value x = create.krnl.load(X, initLoopInd); @@ -190,9 +190,8 @@ struct ONNXCumSumOpLowering : public OpConversionPattern { }); // Outer loop iterates over the number of steps. - ValueRange stepLoopDef = create.krnl.defineLoops(1); - create.krnl.iterateIE(stepLoopDef, stepLoopDef, {zeroIE}, {numberOfStep}, - [&](KrnlBuilder &ck, ValueRange stepLoopInd) { + create.krnl.forLoopIE(zeroIE, numberOfStep, /*step*/ 1, /*par*/ false, + [&](const KrnlBuilder &ck, ValueRange stepLoopInd) { MultiDialectBuilder create(ck); // Compute index offset: offset = 2^step. @@ -210,7 +209,7 @@ struct ONNXCumSumOpLowering : public OpConversionPattern { // y[i,k] = buf[i,k] ValueRange sumLoopDef = create.krnl.defineLoops(rank); create.krnl.iterateIE(sumLoopDef, sumLoopDef, lbs, ubs, - [&](KrnlBuilder &ck, ValueRange sumLoopInd) { + [&](const KrnlBuilder &ck, ValueRange sumLoopInd) { IndexExprScope ieScope(ck); MultiDialectBuilder create(ck); Value axis = axisIE.getValue(); @@ -231,7 +230,7 @@ struct ONNXCumSumOpLowering : public OpConversionPattern { // buf = y ValueRange bufLoopDef = create.krnl.defineLoops(rank); create.krnl.iterateIE(bufLoopDef, bufLoopDef, lbs, ubs, - [&](KrnlBuilder &createKrnl, ValueRange bufLoopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange bufLoopInd) { Value x = createKrnl.load(resMemRef, bufLoopInd); createKrnl.store(x, bufMemRef, bufLoopInd); }); diff --git a/src/Conversion/ONNXToKrnl/Math/Elementwise.cpp b/src/Conversion/ONNXToKrnl/Math/Elementwise.cpp index 19bbcd9d86..8e6feaf540 100644 --- a/src/Conversion/ONNXToKrnl/Math/Elementwise.cpp +++ b/src/Conversion/ONNXToKrnl/Math/Elementwise.cpp @@ -1482,8 +1482,8 @@ static LogicalResult getPartiallyFlattenedSimdCode( } } } - create.krnl.iterateIE( - loopDef, loopDef, lbs, ubs, [&](KrnlBuilder &ck, ValueRange loopInd) { + create.krnl.iterateIE(loopDef, loopDef, lbs, ubs, + [&](const KrnlBuilder &ck, ValueRange loopInd) { MultiDialectBuilder create(ck); // LoopInd has the current indices for all but the innermost dim. Since // we expect here the entire innermost loop iteration in one go, the @@ -2081,7 +2081,7 @@ struct ONNXElementwiseUnaryOpLowering } } create.krnl.iterateIE(loopDef, loopDef, lbs, ubs, - [&](KrnlBuilder &createKrnl, ValueRange loopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange loopInd) { SmallVector args; Value loadedVal = createKrnl.load(X, loopInd); args.emplace_back(loadedVal); @@ -2264,7 +2264,7 @@ struct ONNXElementwiseBinaryOpLowering } } create.krnl.iterateIE(loopDef, loopDef, lbs, ubs, - [&](KrnlBuilder &createKrnl, ValueRange loopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange loopInd) { IndexExprScope innerScope(createKrnl, shapeHelper.getScope()); SmallVector outputAccessExprs; getIndexExprList(loopInd, outputAccessExprs); @@ -2440,7 +2440,7 @@ struct ONNXElementwiseVariadicOpLowering } } create.krnl.iterateIE(loopDef, loopDef, lbs, ubs, - [&](KrnlBuilder &createKrnl, ValueRange loopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange loopInd) { IndexExprScope innerScope(createKrnl, shapeHelper.getScope()); SmallVector outputAccessExprs; getIndexExprList(loopInd, outputAccessExprs); @@ -2563,7 +2563,7 @@ struct ONNXWhereOpLowering : public ConversionPattern { } } create.krnl.iterateIE(loopDef, loopDef, lbs, ubs, - [&](KrnlBuilder &createKrnl, ValueRange loopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange loopInd) { IndexExprScope innerScope(&rewriter, shapeHelper.getScope()); SmallVector outputAccessExprs; getIndexExprList(loopInd, outputAccessExprs); diff --git a/src/Conversion/ONNXToKrnl/Math/Gemm.cpp b/src/Conversion/ONNXToKrnl/Math/Gemm.cpp index 686869f5f6..11d35f8f95 100644 --- a/src/Conversion/ONNXToKrnl/Math/Gemm.cpp +++ b/src/Conversion/ONNXToKrnl/Math/Gemm.cpp @@ -83,7 +83,7 @@ struct ONNXGemmOpLowering : public OpConversionPattern { } } create.krnl.iterateIE(loopDef, outerLoopDef, loopLbs, loopUbs, - [&](KrnlBuilder &createKrnl, ValueRange outerIndices) { + [&](const KrnlBuilder &createKrnl, ValueRange outerIndices) { MultiDialectBuilder create( createKrnl); // Create temp, single scalar, no need for default alignment. @@ -92,7 +92,7 @@ struct ONNXGemmOpLowering : public OpConversionPattern { create.krnl.store(zeroVal, red); // Inner loop. create.krnl.iterate({}, innerLoopDef, {}, {}, - [&](KrnlBuilder &createKrnl, ValueRange innerIndex) { + [&](const KrnlBuilder &createKrnl, ValueRange innerIndex) { Value i(outerIndices[0]), j(outerIndices[1]), k(innerIndex[0]); MultiDialectBuilder create( createKrnl); @@ -250,7 +250,8 @@ struct ONNXGemmOpLowering : public OpConversionPattern { } // Compute: A[i, k] * b[k, j] -> R[i, j]) create.krnl.iterateIE({ii, jj, kk}, {ii1, jj1}, {zeroIE, zeroIE, zeroIE}, - {I, J, K}, [&](KrnlBuilder &createKrnl, ValueRange i1_j1_indices) { + {I, J, K}, + [&](const KrnlBuilder &createKrnl, ValueRange i1_j1_indices) { Value i1(i1_j1_indices[0]), j1(i1_j1_indices[1]); // If parallel, allocate on stack inside the parallel region. if (enableParallel) { @@ -261,7 +262,7 @@ struct ONNXGemmOpLowering : public OpConversionPattern { } createKrnl.copyToBuffer(rBuff, R, {i1, j1}, zeroVal, false); createKrnl.iterateIE({}, {kk1}, {}, {}, - [&](KrnlBuilder &createKrnl, ValueRange k1_index) { + [&](const KrnlBuilder &createKrnl, ValueRange k1_index) { Value k1(k1_index[0]); if (aTrans) createKrnl.copyToBuffer(aBuff, A, {k1, i1}, zeroVal, true); @@ -272,7 +273,8 @@ struct ONNXGemmOpLowering : public OpConversionPattern { else createKrnl.copyToBuffer(bBuff, B, {k1, j1}, zeroVal, false); createKrnl.iterate({}, {jj2, ii2}, {}, {}, - [&](KrnlBuilder &createKrnl, ValueRange j2_i2_indices) { + [&](const KrnlBuilder &createKrnl, + ValueRange j2_i2_indices) { Value j2(j2_i2_indices[0]), i2(j2_i2_indices[1]); ArrayRef empty; createKrnl.matmul(aBuff, {i1, k1}, bBuff, {k1, j1}, @@ -316,7 +318,8 @@ struct ONNXGemmOpLowering : public OpConversionPattern { // "not currently used ones" like ii here last. Gave an error when ii was // listed first. create.krnl.iterateIE({jj, kk, ii}, {jj1, kk1}, {zeroIE, zeroIE, zeroIE}, - {J, K, I}, [&](KrnlBuilder &createKrnl, ValueRange j1_k1_indices) { + {J, K, I}, + [&](const KrnlBuilder &createKrnl, ValueRange j1_k1_indices) { Value j1(j1_k1_indices[0]), k1(j1_k1_indices[1]); // If parallel, allocate on stack inside the parallel region. if (enableParallel) { @@ -330,14 +333,15 @@ struct ONNXGemmOpLowering : public OpConversionPattern { else createKrnl.copyToBuffer(bBuff, B, {k1, j1}, zeroVal, false); createKrnl.iterateIE({}, {ii1}, {}, {}, - [&](KrnlBuilder &createKrnl, ValueRange i1_index) { + [&](const KrnlBuilder &createKrnl, ValueRange i1_index) { Value i1(i1_index[0]); if (aTrans) createKrnl.copyToBuffer(aBuff, A, {k1, i1}, zeroVal, true); else createKrnl.copyToBuffer(aBuff, A, {i1, k1}, zeroVal, false); createKrnl.iterate({}, {jj2, ii2}, {}, {}, - [&](KrnlBuilder &createKrnl, ValueRange j2_i2_indices) { + [&](const KrnlBuilder &createKrnl, + ValueRange j2_i2_indices) { Value j2(j2_i2_indices[0]), i2(j2_i2_indices[1]); createKrnl.matmul(aBuff, {i1, k1}, bBuff, {k1, j1}, R, {z, z}, @@ -374,7 +378,7 @@ struct ONNXGemmOpLowering : public OpConversionPattern { } } create.krnl.iterateIE(outerLoops, outerLoops, {zeroIE, zeroIE}, {I, J}, - [&](KrnlBuilder &createKrnl, ValueRange outerIndices) { + [&](const KrnlBuilder &createKrnl, ValueRange outerIndices) { // Handle alpha/beta coefficients. Value res = createKrnl.load(R, outerIndices); MathBuilder createMath(createKrnl); diff --git a/src/Conversion/ONNXToKrnl/Math/Hardmax.cpp b/src/Conversion/ONNXToKrnl/Math/Hardmax.cpp index e59b7e150b..69ae1153cd 100644 --- a/src/Conversion/ONNXToKrnl/Math/Hardmax.cpp +++ b/src/Conversion/ONNXToKrnl/Math/Hardmax.cpp @@ -51,7 +51,7 @@ static Value emitArgmax(ConversionPatternRewriter &rewriter, Location loc, ValueRange loopDef = create.krnl.defineLoops(rank); SmallVector lbs(rank, LitIE(0)); create.krnl.iterateIE(loopDef, loopDef, lbs, inputUBS, - [&](KrnlBuilder &createKrnl, ValueRange inputLoopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange inputLoopInd) { MultiDialectBuilder create( createKrnl); // Load the index of the current max value. @@ -68,7 +68,7 @@ static Value emitArgmax(ConversionPatternRewriter &rewriter, Location loc, // Compare and update the index for the maximum value. Value gt = create.math.sgt(next, maxValue); - create.scf.ifThenElse(gt, [&](SCFBuilder &createSCF) { + create.scf.ifThenElse(gt, [&](const SCFBuilder &createSCF) { KrnlBuilder createKrnl(createSCF); createKrnl.store(inputLoopInd[axis], resMemRef, resLoopInd); }); @@ -120,7 +120,7 @@ struct ONNXHardmaxOpLowering : public OpConversionPattern { ValueRange loopDef = create.krnl.defineLoops(rank); SmallVector lbs(rank, LitIE(0)); create.krnl.iterateIE(loopDef, loopDef, lbs, ubs, - [&](KrnlBuilder &createKrnl, ValueRange loopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange loopInd) { MultiDialectBuilder create( createKrnl); // Load the index of the current max value. @@ -132,13 +132,13 @@ struct ONNXHardmaxOpLowering : public OpConversionPattern { Value eq = create.math.eq(maxInd, loopInd[axis]); create.scf.ifThenElse( eq, /*then*/ - [&](SCFBuilder &createSCF) { + [&](const SCFBuilder &createSCF) { MultiDialectBuilder create(createSCF); Value one = create.math.constant(elementType, 1); create.krnl.store(one, resMemRef, loopInd); }, /*else*/ - [&](SCFBuilder &createSCF) { + [&](const SCFBuilder &createSCF) { MultiDialectBuilder create(createSCF); Value zero = create.math.constant(elementType, 0); create.krnl.store(zero, resMemRef, loopInd); diff --git a/src/Conversion/ONNXToKrnl/Math/LRN.cpp b/src/Conversion/ONNXToKrnl/Math/LRN.cpp index aa30bc5b65..1b08661a2d 100644 --- a/src/Conversion/ONNXToKrnl/Math/LRN.cpp +++ b/src/Conversion/ONNXToKrnl/Math/LRN.cpp @@ -65,7 +65,7 @@ struct ONNXLRNOpLowering : public OpConversionPattern { SmallVector lbs(outputRank, LitIE(0)); create.krnl.iterateIE(outputLoopDef, outputLoopDef, lbs, shapeHelper.getOutputDims(), - [&](KrnlBuilder &createKrnl, ValueRange outputLoopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange outputLoopInd) { // Insert computation of square_sum. // square_sum[n, c, d1, ..., dk] = sum(X[n, i, d1, ..., dk] ^ 2), // where max(0, c - floor((size - 1) / 2)) <= i diff --git a/src/Conversion/ONNXToKrnl/Math/MatMul.cpp b/src/Conversion/ONNXToKrnl/Math/MatMul.cpp index 0e09c49680..ba16633e3c 100644 --- a/src/Conversion/ONNXToKrnl/Math/MatMul.cpp +++ b/src/Conversion/ONNXToKrnl/Math/MatMul.cpp @@ -86,14 +86,14 @@ struct ONNXMatMulOpLowering : public OpConversionPattern { // Non-reduction loop iterations: output-rank. create.krnl.iterateIE(loopDef, outerLoops, loopLbs, loopUbs, - [&](KrnlBuilder &createKrnl, ValueRange outerIndices) { + [&](const KrnlBuilder &createKrnl, ValueRange outerIndices) { MultiDialectBuilder create( createKrnl); ValueRange inits = ValueRange(fZero); // Inner loop for reduction. auto innerIterate = create.krnl.iterate({}, innerLoop, {}, {}, inits, - [&](KrnlBuilder &createKrnl, ValueRange innerIndex, + [&](const KrnlBuilder &createKrnl, ValueRange innerIndex, ValueRange iterArgs) { // Get last argument for the iterate body. Value iterArg = iterArgs.back(); @@ -340,7 +340,7 @@ struct ONNXMatMulOpLowering : public OpConversionPattern { } } create.krnl.iterate({ii, jj, kk}, {ii1, jj1, kk1}, {zero, zero, zero}, - {I, J, K}, [&](KrnlBuilder &createKrnl, ValueRange indices) { + {I, J, K}, [&](const KrnlBuilder &createKrnl, ValueRange indices) { Value i1(indices[0]), j1(indices[1]), k1(indices[2]); createKrnl.matmul(A, {zero, zero}, B, {zero, zero}, C, {zero, zero}, {ii2, jj2, kk2}, {i1, j1, k1}, {I, J, K}, @@ -420,7 +420,7 @@ struct ONNXMatMulOpLowering : public OpConversionPattern { } } create.krnl.iterate(broadcastLoop, broadcastLoop, broadcastLB, broadcastUB, - [&](KrnlBuilder &createKrnl, ValueRange broadcastIndices) { + [&](const KrnlBuilder &createKrnl, ValueRange broadcastIndices) { MultiDialectBuilder create(createKrnl); // I, J, K loop. ValueRange origLoop = create.krnl.defineLoops(3); @@ -436,7 +436,8 @@ struct ONNXMatMulOpLowering : public OpConversionPattern { create.krnl.permute( {ii1, ii2, jj1, jj2, kk1, kk2}, {0, 3, 1, 4, 2, 5}); create.krnl.iterate({ii, jj, kk}, {ii1, jj1, kk1}, {zero, zero, zero}, - {I, J, K}, [&](KrnlBuilder &createKrnl, ValueRange indices) { + {I, J, K}, + [&](const KrnlBuilder &createKrnl, ValueRange indices) { Value i1(indices[0]), j1(indices[1]), k1(indices[2]); // Compute global start for B/C: {broadcastIndices, 0, 0} SmallVector broadcastGlobalStart; diff --git a/src/Conversion/ONNXToKrnl/Math/Reduction.cpp b/src/Conversion/ONNXToKrnl/Math/Reduction.cpp index e969ca8e85..4a30ac58ad 100644 --- a/src/Conversion/ONNXToKrnl/Math/Reduction.cpp +++ b/src/Conversion/ONNXToKrnl/Math/Reduction.cpp @@ -479,10 +479,8 @@ bool emitFullSIMDReductionFor(ConversionPatternRewriter &rewriter, Location loc, IndexExpr tNumIE = LitIE(tNum); IndexExpr blockSize = ub.ceilDiv(tNum); bool simdOnly = false; // Refine, but since we are chunking input, safer. - ValueRange loopDef = create.krnl.defineLoops(1); - create.krnl.parallel(loopDef[0]); - create.krnl.iterateIE(loopDef, loopDef, {zero}, {tNumIE}, - [&](onnx_mlir::KrnlBuilder &ck, mlir::ValueRange loopInd) { + create.krnl.forLoopIE(zero, tNumIE, /*step*/ 1, /*par*/ true, + [&](const KrnlBuilder &ck, mlir::ValueRange loopInd) { IndexExprScope scope(ck); MDBuilder create(ck); IndexExpr t = DimIE(loopInd[0]); @@ -839,9 +837,8 @@ struct ONNXReductionOpLowering : public OpConversionPattern { if (!axisShape0.isLiteral()) { // When axes is dynamic, generate a Krnl loop KrnlBuilder createKrnl(rewriter, loc); - ValueRange loopDef = createKrnl.defineLoops(1); - createKrnl.iterateIE(loopDef, loopDef, {LitIE(0)}, {axisShape0}, - [&](KrnlBuilder &createKrnl, ValueRange loopInd) { + createKrnl.forLoopIE(LitIE(0), axisShape0, /*step*/ 1, /*par*/ false, + [&](const KrnlBuilder &createKrnl, ValueRange loopInd) { Value axe = createKrnl.load(axesVal, loopInd[0]); Value cond = create.math.slt(axe, zeroValue); Value dim = create.math.select( @@ -975,7 +972,7 @@ struct ONNXReductionOpLowering : public OpConversionPattern { // TODO Temporary disable the 2nd loop parallelism, since its outermost // loop could be a reduction loop, where parallelism would not be safe. create.krnl.iterateIE(loop2Def, loop2Def, lbs2, ubs2, - [&](KrnlBuilder &kb, ValueRange loopInd) { + [&](const KrnlBuilder &kb, ValueRange loopInd) { MultiDialectBuilder create(kb); Value zeroIndex = create.math.constantIndex(0); // Compute accumulator access function. @@ -1021,7 +1018,7 @@ struct ONNXReductionOpLowering : public OpConversionPattern { } } create.krnl.iterateIE(loop3Def, loop3Def, lbs3, ubs3, - [&](KrnlBuilder &kb, ValueRange loopInd) { + [&](const KrnlBuilder &kb, ValueRange loopInd) { MultiDialectBuilder create(kb); Value loadData = create.krnl.load(alloc, loopInd); Value meanVal = create.math.div(loadData, divisorForMean); @@ -1153,7 +1150,7 @@ struct ONNXReductionOpLowering : public OpConversionPattern { } } create.krnl.iterateIE(outLoopDef, outLoopDef, lbs, flatOutDims, - [&](KrnlBuilder &ck, ValueRange outLoopInd) { + [&](const KrnlBuilder &ck, ValueRange outLoopInd) { MDBuilder create(ck); // Allocate temp inside loop (because of parallel). Value tmpAlloca = create.mem.alignedAlloca(tmpType); @@ -1306,7 +1303,7 @@ struct ONNXReductionOpLowering : public OpConversionPattern { } } create.krnl.iterateIE(outLoopDef, optimizedOutLoopDef, lbs, flatOutDims, - [&](KrnlBuilder &ck, ValueRange blockedOutLoopInd) { + [&](const KrnlBuilder &ck, ValueRange blockedOutLoopInd) { MDBuilder create(ck); // Create temp inside loop (because of parallel). Value tmpBlockedAlloca = create.mem.alignedAlloca(tmpBlockedType); @@ -1323,13 +1320,13 @@ struct ONNXReductionOpLowering : public OpConversionPattern { Value isNotFullVal = create.math.slt(isFull.getValue(), zero); create.scf.ifThenElse( isNotFullVal, - [&](SCFBuilder &scf) { + [&](const SCFBuilder &scf) { MDBuilder create(scf); // create.krnl.printf("partial tile\n"); Value startOfLastBlockVal = blockedCurrIndex.getValue(); Value blockedUBVal = blockedUB.getValue(); create.scf.forLoop(startOfLastBlockVal, blockedUBVal, 1, - [&](SCFBuilder &scf, ValueRange loopInd) { + [&](const SCFBuilder &scf, ValueRange loopInd) { MDBuilder create(scf); Value blockLocalInd = loopInd[0]; // Output induction variables: same as the outer loop, but @@ -1344,7 +1341,7 @@ struct ONNXReductionOpLowering : public OpConversionPattern { simdUB, VL, simdOnly); }); /* for inside blocked loop */ }, - [&](SCFBuilder &scf) { + [&](const SCFBuilder &scf) { MDBuilder create(scf); // create.krnl.printf("full tile\n"); genVlHorizontalSimdReduction(rewriter, create, op, elementType, diff --git a/src/Conversion/ONNXToKrnl/Math/Softmax.cpp b/src/Conversion/ONNXToKrnl/Math/Softmax.cpp index 39fee40fb3..9e19f80d1d 100644 --- a/src/Conversion/ONNXToKrnl/Math/Softmax.cpp +++ b/src/Conversion/ONNXToKrnl/Math/Softmax.cpp @@ -33,7 +33,8 @@ static void emitInnerLoops(KrnlBuilder &createKrnl, int64_t numberOfLoops, // Compute the maximum value along axis. ValueRange maxLoops = createKrnl.defineLoops(numberOfLoops); auto maxLoop = createKrnl.iterateIE(maxLoops, maxLoops, Lbs, Ubs, maxInits, - [&](KrnlBuilder &createKrnl, ValueRange maxIndices, ValueRange iterArgs) { + [&](const KrnlBuilder &createKrnl, ValueRange maxIndices, + ValueRange iterArgs) { // Get last argument for the iterate body. Value iterArg = iterArgs.back(); @@ -67,7 +68,8 @@ static void emitInnerLoops(KrnlBuilder &createKrnl, int64_t numberOfLoops, // Compute the sum of all values along axis. ValueRange sumLoops = createKrnl.defineLoops(numberOfLoops); auto sumLoop = createKrnl.iterateIE(sumLoops, sumLoops, Lbs, Ubs, sumInits, - [&](KrnlBuilder &createKrnl, ValueRange sumIndices, ValueRange iterArgs) { + [&](const KrnlBuilder &createKrnl, ValueRange sumIndices, + ValueRange iterArgs) { // Get last argument for the iterate body. Value iterArg = iterArgs.back(); @@ -106,7 +108,7 @@ static void emitInnerLoops(KrnlBuilder &createKrnl, int64_t numberOfLoops, // Compute the softmax. ValueRange softmaxLoops = createKrnl.defineLoops(numberOfLoops); createKrnl.iterateIE(softmaxLoops, softmaxLoops, Lbs, Ubs, - [&](KrnlBuilder &createKrnl, ValueRange softmaxIndices) { + [&](const KrnlBuilder &createKrnl, ValueRange softmaxIndices) { MultiDialectBuilder create(createKrnl); IndexExprScope ieScope(createKrnl); @@ -188,7 +190,7 @@ void emitInstForSoftmax(ConversionPatternRewriter &rewriter, } } create.krnl.iterateIE(outerLoops, outerLoops, outerLbs, outerUbs, - [&](KrnlBuilder &ck, ValueRange outerIndices) { + [&](const KrnlBuilder &ck, ValueRange outerIndices) { MultiDialectBuilder create(ck); @@ -249,7 +251,7 @@ void emitInstForSoftmax(ConversionPatternRewriter &rewriter, // Emit outer loops. create.krnl.iterateIE(outerLoops, outerLoops, outerLbs, outerUbs, - [&](KrnlBuilder &ck, ValueRange outerIndices) { + [&](const KrnlBuilder &ck, ValueRange outerIndices) { MultiDialectBuilder create(ck); IndexExprScope ieScope(ck); diff --git a/src/Conversion/ONNXToKrnl/Math/TopK.cpp b/src/Conversion/ONNXToKrnl/Math/TopK.cpp index 96c6efafa9..358406637e 100644 --- a/src/Conversion/ONNXToKrnl/Math/TopK.cpp +++ b/src/Conversion/ONNXToKrnl/Math/TopK.cpp @@ -72,7 +72,7 @@ struct ONNXTopKOpLowering : public OpConversionPattern { SmallVector zeroDims(rank, LitIE(0)); ValueRange loopDef = create.krnl.defineLoops(rank); create.krnl.iterateIE(loopDef, loopDef, zeroDims, resDims, - [&](KrnlBuilder &createKrnl, ValueRange resLoopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange resLoopInd) { Value resInd = createKrnl.load(argSort, resLoopInd); SmallVector resIndexLoopInd(resLoopInd); resIndexLoopInd[axis] = resInd; diff --git a/src/Conversion/ONNXToKrnl/Math/Trilu.cpp b/src/Conversion/ONNXToKrnl/Math/Trilu.cpp index caa5a016e0..8a1c8dd062 100644 --- a/src/Conversion/ONNXToKrnl/Math/Trilu.cpp +++ b/src/Conversion/ONNXToKrnl/Math/Trilu.cpp @@ -65,7 +65,7 @@ struct ONNXTriluOpLowering : public OpConversionPattern { ValueRange loopDef = create.krnl.defineLoops(rank); SmallVector lbs(rank, LitIE(0)); create.krnl.iterateIE(loopDef, loopDef, lbs, ubs, - [&](KrnlBuilder &createKrnl, ValueRange loopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange loopInd) { MultiDialectBuilder create( createKrnl); Value i = create.math.add(k, loopInd[rank - 2]); diff --git a/src/Conversion/ONNXToKrnl/NN/Conv.cpp b/src/Conversion/ONNXToKrnl/NN/Conv.cpp index 41a1d0b5c0..980fe62c89 100644 --- a/src/Conversion/ONNXToKrnl/NN/Conv.cpp +++ b/src/Conversion/ONNXToKrnl/NN/Conv.cpp @@ -112,7 +112,7 @@ struct ONNXConvOpLowering : public OpConversionPattern { // for wo = 0 .. WO: create.krnl.iterateIE(outputSpacialLoops, outputSpacialLoops, outputSpacialLbs, outputSpacialUbs, - [&](KrnlBuilder &createKrnl, ValueRange outputSpatialIndices) { + [&](const KrnlBuilder &createKrnl, ValueRange outputSpatialIndices) { IndexExprScope outputSpacialScope(createKrnl); MultiDialectBuilder @@ -155,7 +155,7 @@ struct ONNXConvOpLowering : public OpConversionPattern { // for kw in lb .. ub: auto innerIterate = create.krnl.iterateIE(redLoops, redLoops, redLbs, redUbs, inits, - [&](KrnlBuilder &createKrnl, ValueRange redIndices, + [&](const KrnlBuilder &createKrnl, ValueRange redIndices, ValueRange iterArgs) { // Get last argument for the iterate body. Value iterArg = iterArgs.back(); @@ -230,7 +230,7 @@ struct ONNXConvOpLowering : public OpConversionPattern { } } create.krnl.iterateIE(outerLoops, outerLoops, outerLbs, outerUbs, - [&](KrnlBuilder &create, ValueRange outerIndices) { + [&](const KrnlBuilder &create, ValueRange outerIndices) { bodyFunction(outerIndices); }); } diff --git a/src/Conversion/ONNXToKrnl/NN/Normalization.cpp b/src/Conversion/ONNXToKrnl/NN/Normalization.cpp index 1eed53603a..1fb77a5cc1 100644 --- a/src/Conversion/ONNXToKrnl/NN/Normalization.cpp +++ b/src/Conversion/ONNXToKrnl/NN/Normalization.cpp @@ -208,7 +208,7 @@ struct ONNXInstanceNormalizationOpLowering ValueRange n_c_loopDef = create.krnl.defineLoops(2); create.krnl.iterateIE(n_c_loopDef, n_c_loopDef, {iZero, iZero}, {inputBounds[0], inputBounds[1]}, - [&](KrnlBuilder &ck, ValueRange n_c_loopInd) { + [&](const KrnlBuilder &ck, ValueRange n_c_loopInd) { MultiDialectBuilder create( ck); IndexExprScope channelScope(ck); @@ -228,7 +228,7 @@ struct ONNXInstanceNormalizationOpLowering // Iterate over kernel and add values. ValueRange spatial2_loopDef = create.krnl.defineLoops(rank - 2); create.krnl.iterateIE(spatial2_loopDef, spatial2_loopDef, lbs, ubs, - [&](KrnlBuilder &createKrnl, ValueRange spatial_loopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange spatial_loopInd) { MultiDialectBuilder create( createKrnl); SmallVector inputAccessFct = { @@ -247,7 +247,7 @@ struct ONNXInstanceNormalizationOpLowering create.krnl.store(fZero, tmpMemRef); // Iterate over kernel and add values. create.krnl.iterateIE(spatial_loopDef, spatial_loopDef, lbs, ubs, - [&](KrnlBuilder &createKrnl, ValueRange spatial_loopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange spatial_loopInd) { MultiDialectBuilder create( createKrnl); SmallVector inputAccessFct = { @@ -278,7 +278,7 @@ struct ONNXInstanceNormalizationOpLowering // + term. ValueRange spatial3_loopDef = create.krnl.defineLoops(rank - 2); create.krnl.iterateIE(spatial3_loopDef, spatial3_loopDef, lbs, ubs, - [&](KrnlBuilder &createKrnl, ValueRange spatial_loopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange spatial_loopInd) { MultiDialectBuilder create( createKrnl); SmallVector accessFct = {n.getValue(), c.getValue()}; @@ -775,7 +775,7 @@ struct GenericLayerNormaOpLowering : public OpConversionPattern { // Perform reduction of entire vectors. IndexExpr izero = LitIE(0); create.affineKMem.forLoopIE(izero, redDim, totVL, - [&](onnx_mlir::AffineBuilderKrnlMem &ck, ValueRange loopInd) { + [&](const onnx_mlir::AffineBuilderKrnlMem &ck, ValueRange loopInd) { MDBuilder create(ck); Value j = loopInd[0]; // load X, compute X**2, sum into reductions. @@ -830,7 +830,7 @@ struct GenericLayerNormaOpLowering : public OpConversionPattern { }); // Normalize of entire vectors. create.affineKMem.forLoopIE(izero, redDim, totVL, - [&](onnx_mlir::AffineBuilderKrnlMem &ck, ValueRange loopInd) { + [&](const onnx_mlir::AffineBuilderKrnlMem &ck, ValueRange loopInd) { MDBuilder create(ck); Value j = loopInd[0]; // load X, compute X**2, sum into reductions. @@ -941,17 +941,14 @@ struct GenericLayerNormaOpLowering : public OpConversionPattern { invStdDevFlatMemRef); // Alloc mem for reductions (should be private if parallel) MemRefType tmpRedType = MemRefType::get({B, totVL}, elementType); - // Iterate over 1st dim by block - ValueRange loopDefs = create.krnl.defineLoops(1); - IndexExpr zero = LitIE(0); - ValueRange blockedLoopDefs = create.krnl.block(loopDefs[0], B); - Value blockedLoopDef = blockedLoopDefs[0]; + // Iterate over 1st dim by block B. + bool useParallel = false; if (enableParallel) { int64_t parId; SmallVector lb(1, LitIE(0)), ub(1, XFlatDims[0]); if (findSuitableParallelDimension(lb, ub, 0, 1, parId, /*min iter for going parallel*/ 4)) { - create.krnl.parallel(blockedLoopDef); + useParallel = true; onnxToKrnlParallelReport(op, true, 0, lb[0], ub[0], "in layer-norm"); } else { onnxToKrnlParallelReport( @@ -960,8 +957,8 @@ struct GenericLayerNormaOpLowering : public OpConversionPattern { } else { onnxToKrnlParallelReport(op, false, -1, -1, "no parallel in layer norm"); } - create.krnl.iterateIE({loopDefs[0]}, {blockedLoopDef}, {zero}, - {XFlatDims[0]}, [&](KrnlBuilder &ck, ValueRange blockedLoopIndices) { + create.krnl.forLoopIE(LitIE(0), XFlatDims[0], /*step*/ B, useParallel, + [&](const KrnlBuilder &ck, ValueRange blockedLoopIndices) { MDBuilder create(ck); IndexExprScope innerScope(ck); Value tmpRedMemRef = create.mem.alignedAlloca(tmpRedType); @@ -974,13 +971,13 @@ struct GenericLayerNormaOpLowering : public OpConversionPattern { Value isNotFullVal = create.math.slt(isFull.getValue(), zero); create.scf.ifThenElse( isNotFullVal, - [&](SCFBuilder &scf) { + [&](const SCFBuilder &scf) { MDBuilder create(scf); // create.krnl.printf("partial tile\n"); Value startOfLastBlockVal = blockedCurrIndex.getValue(); Value blockedUBVal = blockedUB.getValue(); create.scf.forLoop(startOfLastBlockVal, blockedUBVal, 1, - [&](SCFBuilder &scf, ValueRange loopInd) { + [&](const SCFBuilder &scf, ValueRange loopInd) { MDBuilder create(scf); Value blockLocalInd = loopInd[0]; generateIterWithSIMD(rewriter, create, lnOp, XFlatMemRef, @@ -991,7 +988,7 @@ struct GenericLayerNormaOpLowering : public OpConversionPattern { scaleModFactor, biasModFactor); }); /* for inside blocked loop */ }, - [&](SCFBuilder &scf) { + [&](const SCFBuilder &scf) { MDBuilder create(scf); // create.krnl.printf("full tile\n"); generateIterWithSIMD(rewriter, create, lnOp, XFlatMemRef, diff --git a/src/Conversion/ONNXToKrnl/NN/Pooling.cpp b/src/Conversion/ONNXToKrnl/NN/Pooling.cpp index 320c17a32a..f671192583 100644 --- a/src/Conversion/ONNXToKrnl/NN/Pooling.cpp +++ b/src/Conversion/ONNXToKrnl/NN/Pooling.cpp @@ -324,7 +324,7 @@ struct ONNXPoolOpLowering : public OpConversionPattern { SmallVector ubs; create.krnlIE.getShapeAsDims(alloc, ubs); create.krnl.iterateIE(calcLoopDef, calcLoopDef, lbs, ubs, - [&](KrnlBuilder &createKrnl, ValueRange loopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange loopInd) { MultiDialectBuilder create(createKrnl); diff --git a/src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.cpp b/src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.cpp index f67be95176..4be0f7ecfc 100644 --- a/src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.cpp +++ b/src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.cpp @@ -354,7 +354,7 @@ Value emitArgSort(ConversionPatternRewriter &rewriter, Location loc, Value order = create.mem.alignedAlloc(type, ubs); ValueRange initLoopDef = create.krnl.defineLoops(rank); create.krnl.iterateIE(initLoopDef, initLoopDef, lbs, ubs, - [&](KrnlBuilder &createKrnl, ValueRange loopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange loopInd) { // order[axis_0, axis_1, ..., axis_k-1, k, axis_k+1, ....] = k createKrnl.store(loopInd[axis], order, loopInd); }); @@ -376,11 +376,10 @@ Value emitArgSort(ConversionPatternRewriter &rewriter, Location loc, outerUbs[axis] = ubs[axis] - oneIE; ValueRange loopDef = create.krnl.defineLoops(rank); create.krnl.iterateIE(loopDef, loopDef, lbs, outerUbs, - [&](KrnlBuilder &createKrnl, ValueRange iLoopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange iLoopInd) { IndexExpr i1 = DimIE(iLoopInd[axis]) + oneIE; - ValueRange swapLoopDef = createKrnl.defineLoops(1); - createKrnl.iterateIE(swapLoopDef, swapLoopDef, {i1}, {ubs[axis]}, - [&](KrnlBuilder &ck, ValueRange swapLoopInd) { + createKrnl.forLoopIE(i1, ubs[axis], /*step*/ 1, /*parallel*/ false, + [&](const KrnlBuilder &ck, ValueRange swapLoopInd) { MultiDialectBuilder create( ck); SmallVector kLoopInd(iLoopInd); @@ -402,7 +401,7 @@ Value emitArgSort(ConversionPatternRewriter &rewriter, Location loc, cond = create.math.sgt(x, y); else cond = create.math.slt(x, y); - create.scf.ifThenElse(cond, [&](SCFBuilder &createSCF) { + create.scf.ifThenElse(cond, [&](const SCFBuilder &createSCF) { KrnlBuilder createKrnl(createSCF); createKrnl.store(kOrd, order, iLoopInd); createKrnl.store(iOrd, order, kLoopInd); diff --git a/src/Conversion/ONNXToKrnl/ObjectDetection/NonMaxSuppression.cpp b/src/Conversion/ONNXToKrnl/ObjectDetection/NonMaxSuppression.cpp index 9cb63efbd6..87619358ff 100644 --- a/src/Conversion/ONNXToKrnl/ObjectDetection/NonMaxSuppression.cpp +++ b/src/Conversion/ONNXToKrnl/ObjectDetection/NonMaxSuppression.cpp @@ -119,7 +119,7 @@ static void suppressByScores(ConversionPatternRewriter &rewriter, Location loc, ValueRange bcLoopDef = create.krnl.defineLoops(2); create.krnl.iterate(bcLoopDef, bcLoopDef, {zero, zero}, {bs, cs}, - [&](KrnlBuilder &createKrnl, ValueRange bcLoopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange bcLoopInd) { MultiDialectBuilder create( createKrnl); Value b(bcLoopInd[0]), c(bcLoopInd[1]); @@ -132,7 +132,7 @@ static void suppressByScores(ConversionPatternRewriter &rewriter, Location loc, // threshold. Counting is done per class. ValueRange sLoopDef = create.krnl.defineLoops(1); create.krnl.iterate(sLoopDef, sLoopDef, {zero}, {ss}, - [&](KrnlBuilder &createKrnl, ValueRange sLoopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange sLoopInd) { Value s(sLoopInd[0]); MathBuilder createMath(createKrnl); @@ -175,7 +175,7 @@ static Value tryToUnflip( ValueRange loopDef = create.krnl.defineLoops(2); create.krnl.iterateIE(loopDef, loopDef, {zeroIE, zeroIE}, {bs, ss}, - [&](KrnlBuilder &createKrnl, ValueRange loopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange loopInd) { MathBuilder createMath(createKrnl); DimIndexExpr b(loopInd[0]), s(loopInd[1]); // Load a bounding box. @@ -322,7 +322,7 @@ struct ONNXNonMaxSuppressionOpLowering create.mem.alloca(MemRefType::get({}, indexType)); ValueRange bcLoopDef = create.krnl.defineLoops(2); create.krnl.iterate(bcLoopDef, bcLoopDef, {zero, zero}, {bs, cs}, - [&](KrnlBuilder &createKrnl, ValueRange bcLoopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange bcLoopInd) { MultiDialectBuilder create( createKrnl); // Keep trace of the number of output boxes per class. @@ -340,7 +340,7 @@ struct ONNXNonMaxSuppressionOpLowering // Iterate in the descending order of scores. ValueRange sLoopDef = create.krnl.defineLoops(1); create.krnl.iterate(sLoopDef, sLoopDef, {zero}, {ss}, - [&](KrnlBuilder &createKrnl, ValueRange sLoopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange sLoopInd) { Value b(bcLoopInd[0]), c(bcLoopInd[1]), s(sLoopInd[0]); MultiDialectBuilder create( createKrnl); @@ -399,7 +399,7 @@ struct ONNXNonMaxSuppressionOpLowering // using IOU. ValueRange oLoopDef = create.krnl.defineLoops(1); create.krnl.iterate(oLoopDef, oLoopDef, {zero}, {ss}, - [&](KrnlBuilder &createKrnl, ValueRange oLoopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange oLoopInd) { Value o(oLoopInd[0]); MathBuilder createMath(createKrnl); @@ -446,7 +446,7 @@ struct ONNXNonMaxSuppressionOpLowering ValueRange resLoopDef = create.krnl.defineLoops(2); create.krnl.iterate(resLoopDef, resLoopDef, {zero, zero}, {effectiveNSI, three}, - [&](KrnlBuilder &createKrnl, ValueRange resLoopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange resLoopInd) { MathBuilder createMath(createKrnl); Value load = createKrnl.load(selectedMemRef, resLoopInd); Value res = createMath.cast(elementType, load); diff --git a/src/Conversion/ONNXToKrnl/Quantization/QuantizeLinear.cpp b/src/Conversion/ONNXToKrnl/Quantization/QuantizeLinear.cpp index 3c3143c4ad..d7af519ea9 100644 --- a/src/Conversion/ONNXToKrnl/Quantization/QuantizeLinear.cpp +++ b/src/Conversion/ONNXToKrnl/Quantization/QuantizeLinear.cpp @@ -111,7 +111,7 @@ void emitQuantizationLinearScalarParameters(ConversionPatternRewriter &rewriter, // non-quantized and quantized simd values, but then we also need to privatize // it, which is also not easy in this scheme. So ignore this for now. create.krnl.forLoopIE(simdLb, simdUb, 1, enableParallel, - [&](KrnlBuilder &kb, ValueRange loopInd) { + [&](const KrnlBuilder &kb, ValueRange loopInd) { MultiDialectBuilder create(kb); Value buffVal = create.krnl.loadIE(flatBuffer, {zero}, {loopInd[0]}); Value res = create.math.cast(quantizedElementType, buffVal); diff --git a/src/Conversion/ONNXToKrnl/RNN/GRU.cpp b/src/Conversion/ONNXToKrnl/RNN/GRU.cpp index b90fe14696..cc6d94d42a 100644 --- a/src/Conversion/ONNXToKrnl/RNN/GRU.cpp +++ b/src/Conversion/ONNXToKrnl/RNN/GRU.cpp @@ -452,7 +452,7 @@ void calculateState( // Do element-wise computations. Fuse them into a single nested loop. ValueRange loops = create.krnl.defineLoops(htRank); create.krnl.iterate(loops, loops, htLbs, htUbs, - [&](KrnlBuilder &createKrnl, ValueRange indices) { + [&](const KrnlBuilder &createKrnl, ValueRange indices) { MathBuilder createMath(createKrnl); IndexExprScope ieScope(createKrnl); Value bs(indices[0]), hs(indices[1]); @@ -541,7 +541,7 @@ void calculateState( // Emit rt and (rt (.) Ht-1). ValueRange loops1 = create.krnl.defineLoops(htRank); create.krnl.iterate(loops1, loops1, htLbs, htUbs, - [&](KrnlBuilder &createKrnl, ValueRange indices) { + [&](const KrnlBuilder &createKrnl, ValueRange indices) { MathBuilder createMath(createKrnl); IndexExprScope ieScope(createKrnl); Value bs(indices[0]), hs(indices[1]); @@ -574,7 +574,7 @@ void calculateState( // Do element-wise computations. Fuse them into a single nested loop. ValueRange loops2 = create.krnl.defineLoops(htRank); create.krnl.iterate(loops2, loops2, htLbs, htUbs, - [&](KrnlBuilder &createKrnl, ValueRange indices) { + [&](const KrnlBuilder &createKrnl, ValueRange indices) { MathBuilder createMath(createKrnl); IndexExprScope ieScope(createKrnl); Value bs(indices[0]), hs(indices[1]); diff --git a/src/Conversion/ONNXToKrnl/RNN/LSTM.cpp b/src/Conversion/ONNXToKrnl/RNN/LSTM.cpp index dceed2cb5e..49ae86408e 100644 --- a/src/Conversion/ONNXToKrnl/RNN/LSTM.cpp +++ b/src/Conversion/ONNXToKrnl/RNN/LSTM.cpp @@ -349,7 +349,7 @@ void calculateState( } ValueRange loops = create.krnl.defineLoops(htRank); create.krnl.iterate(loops, loops, htLbs, htUbs, - [&](KrnlBuilder &createKrnl, ValueRange indices) { + [&](const KrnlBuilder &createKrnl, ValueRange indices) { MathBuilder createMath(createKrnl); Value bs(indices[0]), hs(indices[1]); // Ht = f(Xt*(Wi^T) + Ht-1*(Ri^T) + Wbi + Rbi) diff --git a/src/Conversion/ONNXToKrnl/RNN/RNNBase.cpp b/src/Conversion/ONNXToKrnl/RNN/RNNBase.cpp index 797f56953e..c21930a7c7 100644 --- a/src/Conversion/ONNXToKrnl/RNN/RNNBase.cpp +++ b/src/Conversion/ONNXToKrnl/RNN/RNNBase.cpp @@ -95,7 +95,7 @@ void initializeIntermediateStates(ConversionPatternRewriter &rewriter, SmallVector ubs; create.krnlIE.getShapeAsDims(boundVal, ubs); create.krnl.iterateIE(loopDef, loopDef, lbs, ubs, - [&](KrnlBuilder &createKrnl, ValueRange loopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange loopInd) { SmallVector IVs; IVs.emplace_back(loopInd[0]); IVs.emplace_back(loopInd[1]); @@ -193,7 +193,7 @@ void initializeHiddenAndCell(ConversionPatternRewriter &rewriter, Location loc, } ValueRange loops = create.krnl.defineLoops(htRank); create.krnl.iterate(loops, loops, htLbs, htUbs, - [&](KrnlBuilder &createKrnl, ValueRange indices) { + [&](const KrnlBuilder &createKrnl, ValueRange indices) { Value hiddenVal = zero; if (!isNoneValue(initialH)) hiddenVal = createKrnl.load(initialH, indices); @@ -232,7 +232,7 @@ void stateToOutputForHiddenOrCell(ConversionPatternRewriter &rewriter, } ValueRange loops = create.krnl.defineLoops(2); create.krnl.iterate(loops, loops, lbs, ubs, - [&](KrnlBuilder &createKrnl, ValueRange indices) { + [&](const KrnlBuilder &createKrnl, ValueRange indices) { Value b(indices[0]), h(indices[1]); // Forward. Value val = createKrnl.load(forwardVal, {b, h}); @@ -275,8 +275,8 @@ Value emitXSliceAt(ConversionPatternRewriter &rewriter, Location loc, Value X, ubs.emplace_back(create.mem.dim(sliceX, r)); } ValueRange loops = create.krnl.defineLoops(2); - create.krnl.iterate( - loops, loops, lbs, ubs, [&](KrnlBuilder &createKrnl, ValueRange indices) { + create.krnl.iterate(loops, loops, lbs, ubs, + [&](const KrnlBuilder &createKrnl, ValueRange indices) { Value b(indices[0]), i(indices[1]); Value val = createKrnl.load(X, {timestepIV, b, i}); createKrnl.store(val, sliceX, {b, i}); @@ -289,9 +289,10 @@ Value emitXSliceAt(ConversionPatternRewriter &rewriter, Location loc, Value X, // When a sample reachs the limit of its sequence len, nextHt will be padded // with 0 (or initialH), and Ht will keep the last value at the sequence end // so that the final value Ht is the last value at their sequence len. -Value handleSequenceLens(KrnlBuilder &createKrnl, MathBuilder &createMath, - Value sequenceLens, Value initialH, Value nextHt, Value sequenceIV, - Value directionIV, Value bs, Value hs, Value Ht) { +Value handleSequenceLens(const KrnlBuilder &createKrnl, + const MathBuilder &createMath, Value sequenceLens, Value initialH, + Value nextHt, Value sequenceIV, Value directionIV, Value bs, Value hs, + Value Ht) { if (!isNoneValue(sequenceLens)) { Value sequenceUB = createKrnl.load(sequenceLens, {bs}); Value initial; diff --git a/src/Conversion/ONNXToKrnl/RNN/RNNBase.hpp b/src/Conversion/ONNXToKrnl/RNN/RNNBase.hpp index d9177a5642..607bd77bea 100644 --- a/src/Conversion/ONNXToKrnl/RNN/RNNBase.hpp +++ b/src/Conversion/ONNXToKrnl/RNN/RNNBase.hpp @@ -65,10 +65,10 @@ mlir::Value emitXSliceAt(mlir::ConversionPatternRewriter &rewriter, // When a sample reachs the limit of its sequence len, nextHt will be padded // with 0 (or initialH), and Ht will keep the last value at the sequence end // so that the final value Ht is the last value at their sequence len. -mlir::Value handleSequenceLens(KrnlBuilder &createKrnl, MathBuilder &createMath, - mlir::Value sequenceLens, mlir::Value initialH, mlir::Value nextHt, - mlir::Value sequenceIV, mlir::Value directionIV, mlir::Value bs, - mlir::Value hs, mlir::Value Ht); +mlir::Value handleSequenceLens(const KrnlBuilder &createKrnl, + const MathBuilder &createMath, mlir::Value sequenceLens, + mlir::Value initialH, mlir::Value nextHt, mlir::Value sequenceIV, + mlir::Value directionIV, mlir::Value bs, mlir::Value hs, mlir::Value Ht); // Override the following methods when lowering an RNN operation: // - hasAllNoneOutput @@ -160,15 +160,14 @@ struct ONNXRNNOpLowering : public mlir::OpConversionPattern { if (direction == FORWARD || direction == BIDIRECTIONAL) { IndexExprScope childScope(create.krnl); - mlir::ValueRange loopDef = create.krnl.defineLoops(1); - llvm::SmallVector lbs(1, LitIE(0)); - llvm::SmallVector ubs; + IndexExpr lb = LitIE(0); + IndexExpr ub; if (!mlir::ShapedType::isDynamic(sequenceDimSize)) - ubs.emplace_back(LitIE(sequenceDimSize)); + ub = LitIE(sequenceDimSize); else - ubs.emplace_back(create.krnlIE.getShapeAsDim(X, 0)); - create.krnl.iterateIE(loopDef, loopDef, lbs, ubs, - [&](KrnlBuilder &createKrnl, mlir::ValueRange loopInd) { + ub = create.krnlIE.getShapeAsDim(X, 0); + create.krnl.forLoopIE(lb, ub, /*step*/ 1, /*par*/ false, + [&](const KrnlBuilder &createKrnl, mlir::ValueRange loopInd) { MathBuilder createMath(createKrnl); mlir::Value directionIV = createMath.constant(rewriter.getIndexType(), 0); @@ -185,15 +184,14 @@ struct ONNXRNNOpLowering : public mlir::OpConversionPattern { if (direction == REVERSE || direction == BIDIRECTIONAL) { IndexExprScope childScope(create.krnl); - mlir::ValueRange loopDef = create.krnl.defineLoops(1); - llvm::SmallVector lbs(1, LitIE(0)); - llvm::SmallVector ubs; + IndexExpr lb = LitIE(0); + IndexExpr ub; if (!mlir::ShapedType::isDynamic(sequenceDimSize)) - ubs.emplace_back(LitIE(sequenceDimSize)); + ub = LitIE(sequenceDimSize); else - ubs.emplace_back(create.krnlIE.getShapeAsDim(X, 0)); - create.krnl.iterateIE(loopDef, loopDef, lbs, ubs, - [&](KrnlBuilder &ck, mlir::ValueRange loopInd) { + ub = create.krnlIE.getShapeAsDim(X, 0); + create.krnl.forLoopIE(lb, ub, /*step*/ 1, /*par*/ false, + [&](const KrnlBuilder &ck, mlir::ValueRange loopInd) { MultiDialectBuilder create(ck); mlir::AffineMap reverseIVMap = mlir::AffineMap::get(1, 1, diff --git a/src/Conversion/ONNXToKrnl/Sequence/SequenceErase.cpp b/src/Conversion/ONNXToKrnl/Sequence/SequenceErase.cpp index ecf21568ab..5657d0555e 100644 --- a/src/Conversion/ONNXToKrnl/Sequence/SequenceErase.cpp +++ b/src/Conversion/ONNXToKrnl/Sequence/SequenceErase.cpp @@ -64,13 +64,8 @@ struct ONNXSequenceEraseOpLowering // Copy the elements before the position KrnlBuilder createKrnl(rewriter, loc); - SmallVector lbs; - lbs.emplace_back(LitIE(0)); - SmallVector ubs; - ubs.emplace_back(positionIE); - ValueRange firstLoopDef = createKrnl.defineLoops(1); - createKrnl.iterateIE(firstLoopDef, firstLoopDef, lbs, ubs, - [&](KrnlBuilder createKrnl, ValueRange indicesLoopInd) { + createKrnl.forLoopIE(LitIE(0), positionIE, /*step*/ 1, /*par*/ false, + [&](const KrnlBuilder createKrnl, ValueRange indicesLoopInd) { Value element = createKrnl.load(adaptor.getInputSequence(), indicesLoopInd[0]); createKrnl.seqstore(element, alloc, positionIE); @@ -78,13 +73,8 @@ struct ONNXSequenceEraseOpLowering }); // Copy the elements after the position - SmallVector lbs1; - lbs1.emplace_back(positionIE + 1); - SmallVector ubs1; - ubs1.emplace_back(boundIE); - ValueRange secondLoopDef = createKrnl.defineLoops(1); - createKrnl.iterateIE(secondLoopDef, secondLoopDef, lbs1, ubs1, - [&](KrnlBuilder createKrnl, ValueRange indicesLoopInd) { + createKrnl.forLoopIE(positionIE + 1, boundIE, /*step*/ 1, /*par*/ false, + [&](const KrnlBuilder createKrnl, ValueRange indicesLoopInd) { Value element = createKrnl.load(adaptor.getInputSequence(), indicesLoopInd[0]); Value oneIndex = create.math.constantIndex(1); diff --git a/src/Conversion/ONNXToKrnl/Sequence/SequenceInsert.cpp b/src/Conversion/ONNXToKrnl/Sequence/SequenceInsert.cpp index 1840cc97be..806dbb71d3 100644 --- a/src/Conversion/ONNXToKrnl/Sequence/SequenceInsert.cpp +++ b/src/Conversion/ONNXToKrnl/Sequence/SequenceInsert.cpp @@ -77,13 +77,8 @@ struct ONNXSequenceInsertOpLowering // compilation problem due to the unranked tensor even though // the loop will not be reached at runtime. } else { - SmallVector lbs; - lbs.emplace_back(LitIE(0)); - SmallVector ubs; - ubs.emplace_back(positionIE); - ValueRange firstLoopDef = createKrnl.defineLoops(1); - createKrnl.iterateIE(firstLoopDef, firstLoopDef, lbs, ubs, - [&](KrnlBuilder createKrnl, ValueRange indicesLoopInd) { + createKrnl.forLoopIE(LitIE(0), positionIE, /*step*/ 1, /*par*/ false, + [&](const KrnlBuilder createKrnl, ValueRange indicesLoopInd) { auto element = createKrnl.load(adaptor.getInputSequence(), indicesLoopInd[0]); createKrnl.seqstore(element, alloc, positionIE); @@ -91,13 +86,8 @@ struct ONNXSequenceInsertOpLowering }); // Copy the elements after the position - SmallVector lbs1; - lbs1.emplace_back(positionIE + 1); - SmallVector ubs1; - ubs1.emplace_back(boundIE); - ValueRange secondLoopDef = createKrnl.defineLoops(1); - createKrnl.iterateIE(secondLoopDef, secondLoopDef, lbs1, ubs1, - [&](KrnlBuilder createKrnl, ValueRange indicesLoopInd) { + createKrnl.forLoopIE(positionIE + 1, boundIE, /*step*/ 1, /*par*/ false, + [&](const KrnlBuilder createKrnl, ValueRange indicesLoopInd) { auto element = createKrnl.load(adaptor.getInputSequence(), indicesLoopInd[0]); auto oneIndex = create.math.constantIndex(1); diff --git a/src/Conversion/ONNXToKrnl/Tensor/ArgMinMax.cpp b/src/Conversion/ONNXToKrnl/Tensor/ArgMinMax.cpp index be0d50a112..3ed51b37a9 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/ArgMinMax.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/ArgMinMax.cpp @@ -100,7 +100,7 @@ struct ONNXArgMinMaxOpLowering : public OpConversionPattern { ValueRange initLoopDef = create.krnl.defineLoops(reducedRank); SmallVector initLbs(reducedRank, LitIE(0)); create.krnl.iterateIE(initLoopDef, initLoopDef, initLbs, outputDims, - [&](KrnlBuilder &createKrnl, ValueRange loopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange loopInd) { createKrnl.store(minusOne, alloc, loopInd); }); @@ -110,7 +110,7 @@ struct ONNXArgMinMaxOpLowering : public OpConversionPattern { SmallVector ubs; create.krnlIE.getShapeAsDims(data, ubs); create.krnl.iterateIE(calcLoopDef, calcLoopDef, lbs, ubs, - [&](KrnlBuilder &createKrnl, ValueRange loopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange loopInd) { // Handle the operation: SmallVector inLoopIVs, outLoopIVs, dstLoopIVs; diff --git a/src/Conversion/ONNXToKrnl/Tensor/Compress.cpp b/src/Conversion/ONNXToKrnl/Tensor/Compress.cpp index 13546b0ebf..5d8c08f849 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/Compress.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/Compress.cpp @@ -60,9 +60,8 @@ struct ONNXCompressOpLowering : public OpConversionPattern { // Now create a loop to iterate over all conditions. Value condMemRef = adaptor.getCondition(); IndexExpr condShapeFirstRank = create.krnlIE.getShapeAsDim(condMemRef, 0); - ValueRange loopDef = create.krnl.defineLoops(1); - create.krnl.iterateIE(loopDef, loopDef, {zeroIE}, {condShapeFirstRank}, - [&](KrnlBuilder createKrnl, ValueRange loopInd) { + create.krnl.forLoopIE(zeroIE, condShapeFirstRank, /*step*/ 1, /*par*/ false, + [&](const KrnlBuilder createKrnl, ValueRange loopInd) { MathBuilder createMath(createKrnl); // Load the condition Value currCond = createKrnl.load(condMemRef, loopInd); // Type i1. @@ -148,19 +147,19 @@ struct ONNXCompressOpLowering : public OpConversionPattern { ValueRange inputLoopDef = create.krnl.defineLoops(inputRank); create.krnl.iterateIE(inputLoopDef, inputLoopDef, inputLbs, inputUbs, - [&](KrnlBuilder createKrnl, ValueRange inputLoopInd) { + [&](const KrnlBuilder createKrnl, ValueRange inputLoopInd) { MultiDialectBuilder create( createKrnl); Value readIndex = create.krnl.load(readIndexMemRef); Value inBound = trueVal; if (!skipCond) inBound = create.math.slt(readIndex, condUb); - create.scf.ifThenElse(inBound, [&](SCFBuilder &createSCF) { + create.scf.ifThenElse(inBound, [&](const SCFBuilder &createSCF) { MultiDialectBuilder create( createSCF); Value currCond = create.krnl.load(condMemRef, {readIndex}); Value copy = create.math.neq(currCond, falseVal); - create.scf.ifThenElse(copy, [&](SCFBuilder &createSCF) { + create.scf.ifThenElse(copy, [&](const SCFBuilder &createSCF) { MultiDialectBuilder create(createSCF); Value val = create.krnl.load(inputMemRef, inputLoopInd); // Copy to output. @@ -215,10 +214,9 @@ struct ONNXCompressOpLowering : public OpConversionPattern { innerLbs.emplace_back(inputLbs[i]); innerUbs.emplace_back(inputUbs[i]); } - ValueRange axisLoopDef = create.krnl.defineLoops(1); - create.krnl.iterateIE(axisLoopDef, axisLoopDef, {inputLbs[axisValue]}, - {inputUbs[axisValue]}, - [&](KrnlBuilder createKrnl, ValueRange axisLoopInd) { + create.krnl.forLoopIE(inputLbs[axisValue], inputUbs[axisValue], + /*step*/ 1, /*par*/ false, + [&](const KrnlBuilder createKrnl, ValueRange axisLoopInd) { MultiDialectBuilder create( createKrnl); // Compute the test if we have enough condition value for current @@ -227,12 +225,12 @@ struct ONNXCompressOpLowering : public OpConversionPattern { Value inBound = trueVal; if (!skipCond) inBound = create.math.slt(readIndex, condUb); - create.scf.ifThenElse(inBound, [&](SCFBuilder &createSCF) { + create.scf.ifThenElse(inBound, [&](const SCFBuilder &createSCF) { MultiDialectBuilder create( createSCF); Value currCond = create.krnl.load(condMemRef, {readIndex}); Value copy = create.math.neq(currCond, falseVal); - create.scf.ifThenElse(copy, [&](SCFBuilder &createSCF) { + create.scf.ifThenElse(copy, [&](const SCFBuilder &createSCF) { KrnlBuilder createKrnl(createSCF); // Load the write index. Value writeIndex = createKrnl.load(writeIndexMemRef); @@ -240,7 +238,7 @@ struct ONNXCompressOpLowering : public OpConversionPattern { ValueRange innerLoopDefs = createKrnl.defineLoops(innerRank); createKrnl.iterateIE(innerLoopDefs, innerLoopDefs, innerLbs, innerUbs, - [&](KrnlBuilder createKrnl, ValueRange innerLoopInd) { + [&](const KrnlBuilder createKrnl, ValueRange innerLoopInd) { MathBuilder createMath(createKrnl); // Compute access functions for input and output. SmallVector inputAccessFct, outputAccessFct; diff --git a/src/Conversion/ONNXToKrnl/Tensor/Concat.cpp b/src/Conversion/ONNXToKrnl/Tensor/Concat.cpp index ca98ed6a8b..40fc1c9d92 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/Concat.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/Concat.cpp @@ -101,7 +101,7 @@ struct ONNXConcatOpLowering : public OpConversionPattern { } } create.krnl.iterateIE(loopDef, loopDef, lbs, commonUB, - [&](KrnlBuilder &createKrnl, ValueRange loopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange loopInd) { // Indices for the read and write. SmallVector readIndices, writeIndices; for (unsigned int r = 0; r < rank; ++r) { diff --git a/src/Conversion/ONNXToKrnl/Tensor/ConcatShapeTranspose.cpp b/src/Conversion/ONNXToKrnl/Tensor/ConcatShapeTranspose.cpp index 070d3e7273..aa86fbbec3 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/ConcatShapeTranspose.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/ConcatShapeTranspose.cpp @@ -140,7 +140,7 @@ struct ONNXConcatShapeTransposeOpLowering // For each input, only the dimension 'axis' is different commonUB[axis] = ubs[axis]; create.krnl.iterateIE(loopDef, loopDef, lbs, commonUB, - [&](KrnlBuilder &createKrnl, ValueRange loopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange loopInd) { // Indices for the read and write. SmallVector readIndices, writeIndices; for (unsigned int r = 0; r < rank; ++r) { diff --git a/src/Conversion/ONNXToKrnl/Tensor/ConstantOfShape.cpp b/src/Conversion/ONNXToKrnl/Tensor/ConstantOfShape.cpp index ab7003d2d6..ebce30ce5a 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/ConstantOfShape.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/ConstantOfShape.cpp @@ -84,7 +84,7 @@ struct ONNXConstantOfShapeOpLowering SmallVector ubs; create.krnlIE.getShapeAsDims(alloc, ubs); create.krnl.iterateIE(loopDef, loopDef, lbs, ubs, - [&](KrnlBuilder &createKrnl, ValueRange loopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange loopInd) { createKrnl.store(constantVal, alloc, loopInd); }); } else diff --git a/src/Conversion/ONNXToKrnl/Tensor/Expand.cpp b/src/Conversion/ONNXToKrnl/Tensor/Expand.cpp index b75a94e5a6..ae93d5ba9e 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/Expand.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/Expand.cpp @@ -56,7 +56,7 @@ struct ONNXExpandOpLowering : public OpConversionPattern { SmallVector lbs(outputRank, zeroIE); create.krnl.iterateIE(outputLoopDef, outputLoopDef, lbs, shapeHelper.getOutputDims(), - [&](KrnlBuilder &createKrnl, ValueRange outputLoopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange outputLoopInd) { IndexExprScope outputScope(createKrnl, shapeHelper.getScope()); SmallVector outputLoopIndices, lhsAccessExprs; getIndexExprList(outputLoopInd, outputLoopIndices); diff --git a/src/Conversion/ONNXToKrnl/Tensor/Gather.cpp b/src/Conversion/ONNXToKrnl/Tensor/Gather.cpp index 35cb8c4f7e..58227a2b69 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/Gather.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/Gather.cpp @@ -102,7 +102,7 @@ struct ONNXGatherOpLowering : public OpConversionPattern { } } create.krnl.iterateIE(loopDef, loopDef, lbs, shapeHelper.getOutputDims(), - [&](KrnlBuilder &createKrnl, ValueRange loopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange loopInd) { // Insert code inside the loop. IndexExprScope innerLoopScope(createKrnl); SymbolIndexExpr axisDim(dataDims[axisLit]); diff --git a/src/Conversion/ONNXToKrnl/Tensor/GatherElements.cpp b/src/Conversion/ONNXToKrnl/Tensor/GatherElements.cpp index 0452b983df..0f3b5c24e2 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/GatherElements.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/GatherElements.cpp @@ -75,7 +75,7 @@ struct ONNXGatherElementsOpLowering ValueRange loopDef = create.krnl.defineLoops(indicesRank); DimsExpr lbs(indicesRank, LitIE(0)); create.krnl.iterateIE(loopDef, loopDef, lbs, indicesDims, - [&](KrnlBuilder &createKrnl, ValueRange loopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange loopInd) { // Insert code inside the loop. IndexExprScope innerLoopScope(createKrnl); diff --git a/src/Conversion/ONNXToKrnl/Tensor/GatherND.cpp b/src/Conversion/ONNXToKrnl/Tensor/GatherND.cpp index 05551e66e3..5adc92e74b 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/GatherND.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/GatherND.cpp @@ -32,7 +32,7 @@ struct ONNXGatherNDOpLowering : public OpConversionPattern { // Debug function used to emit code to print the supplied 'indices'. static void printIndices( - StringRef title, const DimsExpr &indices, KrnlBuilder &createKrnl) { + StringRef title, const DimsExpr &indices, const KrnlBuilder &createKrnl) { llvm::Twine msg(title + ": ("); createKrnl.printf(msg.str()); int64_t n = static_cast(indices.size()); @@ -141,7 +141,7 @@ struct ONNXGatherNDOpLowering : public OpConversionPattern { } create.krnl.iterateIE(loopDef, loopDef, lbs, ubs, - [&](KrnlBuilder &createKrnl, ValueRange loopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange loopInd) { // Insert code inside the loop. IndexExprScope innerLoopScope(createKrnl); @@ -212,7 +212,7 @@ struct ONNXGatherNDOpLowering : public OpConversionPattern { Value last = reshapedDataLastDimExpr.getValue(); ValueRange innerLoopDef = create.krnl.defineLoops(1); create.krnl.iterate(innerLoopDef, innerLoopDef, {zero}, {last}, - [&](KrnlBuilder &createKrnl, ValueRange innerLoopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange innerLoopInd) { IndexExpr ind = SymIE(innerLoopInd[0]); reshapedDataAccessFct.emplace_back(ind); assert(static_cast(reshapedDataAccessFct.size()) == diff --git a/src/Conversion/ONNXToKrnl/Tensor/NonZero.cpp b/src/Conversion/ONNXToKrnl/Tensor/NonZero.cpp index 63f4eb0d4b..2ef0d0ac91 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/NonZero.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/NonZero.cpp @@ -130,7 +130,7 @@ struct ONNXNonZeroOpLowering : public OpConversionPattern { ValueRange initLoopDef = create.krnl.defineLoops(1); create.krnl.iterate(initLoopDef, initLoopDef, {iZero}, {xBound.getValue()}, - [&](KrnlBuilder &createKrnl, ValueRange loopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange loopInd) { createKrnl.store(iZero, alloc, loopInd); }); rsumMemRefs.emplace_back(alloc); @@ -140,7 +140,7 @@ struct ONNXNonZeroOpLowering : public OpConversionPattern { // the reduction sum for each dimension. ValueRange rsumLoopDef = create.krnl.defineLoops(xMemRefType.getRank()); create.krnl.iterateIE(rsumLoopDef, rsumLoopDef, xLbs, xUbs, - [&](KrnlBuilder &createKrnl, ValueRange loopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange loopInd) { MathBuilder createMath(createKrnl); Value x = createKrnl.load(X, loopInd); Value eqCond = createMath.eq(x, zero); @@ -180,7 +180,7 @@ struct ONNXNonZeroOpLowering : public OpConversionPattern { Value sum = create.mem.alloca(MemRefType::get({}, indexTy)); ValueRange iLoopDef = create.krnl.defineLoops(1); create.krnl.iterate(iLoopDef, iLoopDef, {iZero}, {numberOfZeros}, - [&](KrnlBuilder &ck, ValueRange iLoopInd) { + [&](const KrnlBuilder &ck, ValueRange iLoopInd) { MultiDialectBuilder create(ck); @@ -197,7 +197,7 @@ struct ONNXNonZeroOpLowering : public OpConversionPattern { ValueRange jLoopDef = create.krnl.defineLoops(1); create.krnl.iterate(jLoopDef, jLoopDef, {iZero}, {rsumBounds0.getValue()}, - [&](KrnlBuilder &createKrnl, ValueRange jLoopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange jLoopInd) { MathBuilder createMath(createKrnl); Value j(jLoopInd[0]); Value o = createKrnl.load(rsumMemRefs[axis], {j}); diff --git a/src/Conversion/ONNXToKrnl/Tensor/OneHot.cpp b/src/Conversion/ONNXToKrnl/Tensor/OneHot.cpp index bf55e3636f..574856ab17 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/OneHot.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/OneHot.cpp @@ -62,7 +62,8 @@ struct ONNXOneHotOpLowering : public OpConversionPattern { create.krnlIE.getShapeAsDims(indices, indicesUbs); ValueRange indicesLoopDef = create.krnl.defineLoops(indicesRank); create.krnl.iterateIE(indicesLoopDef, indicesLoopDef, indicesLbs, - indicesUbs, [&](KrnlBuilder createKrnl, ValueRange indicesLoopInd) { + indicesUbs, + [&](const KrnlBuilder createKrnl, ValueRange indicesLoopInd) { // Loop for all input values. MathBuilder createMath(createKrnl); // Input val is allowed to be any integer/float. Read and convert to @@ -89,9 +90,8 @@ struct ONNXOneHotOpLowering : public OpConversionPattern { Value onValueIndexVal = onValueIndex.getValue(); // Now we have the index that is on, iterate over the depth values // along axis, and set the right one to the value on. - ValueRange depthLoopDef = createKrnl.defineLoops(1); - createKrnl.iterateIE(depthLoopDef, depthLoopDef, {zeroIE}, {depth}, - [&](KrnlBuilder createBuilder, ValueRange depthLoopInd) { + createKrnl.forLoopIE(zeroIE, depth, /*step*/ 1, /*par*/ false, + [&](const KrnlBuilder createBuilder, ValueRange depthLoopInd) { MathBuilder createMath(createKrnl); Value onCond = createMath.eq(depthLoopInd[0], onValueIndexVal); Value res = createMath.select(onCond, onVal, offVal); diff --git a/src/Conversion/ONNXToKrnl/Tensor/Pad.cpp b/src/Conversion/ONNXToKrnl/Tensor/Pad.cpp index 1dec9ea6a5..1b3d83890c 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/Pad.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/Pad.cpp @@ -98,7 +98,7 @@ struct ONNXPadOpLowering : public OpConversionPattern { create.krnlIE.getShapeAsDims(data, ubs); ValueRange mainLoopDef = create.krnl.defineLoops(rank); create.krnl.iterateIE(mainLoopDef, mainLoopDef, lbs, ubs, - [&](KrnlBuilder &createKrnl, ValueRange dataLoopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange dataLoopInd) { SmallVector resLoopInd; for (uint64_t i = 0; i < rank; ++i) { IndexExpr resInd = DimIE(dataLoopInd[i]) + shapeHelper.pads[i]; @@ -116,7 +116,7 @@ struct ONNXPadOpLowering : public OpConversionPattern { // Iterate over the result tensor dimensions. ValueRange mainLoopDef = create.krnl.defineLoops(rank); create.krnl.iterateIE(mainLoopDef, mainLoopDef, lbs, ubs, - [&](KrnlBuilder &createKrnl, ValueRange resLoopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange resLoopInd) { MultiDialectBuilder create( createKrnl); SmallVector dataLoopInd; diff --git a/src/Conversion/ONNXToKrnl/Tensor/Range.cpp b/src/Conversion/ONNXToKrnl/Tensor/Range.cpp index 52543593bb..8ddd91f99b 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/Range.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/Range.cpp @@ -159,7 +159,7 @@ struct ONNXRangeOpLowering : public OpConversionPattern { SmallVector ubs; create.krnlIE.getShapeAsDims(alloc, ubs); create.krnl.iterateIE(loopDef, loopDef, {LitIE(0)}, ubs, - [&](KrnlBuilder &createKrnl, ValueRange loopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange loopInd) { // Emit body of the loop: // output[i] = start + (i * delta); // Read value: diff --git a/src/Conversion/ONNXToKrnl/Tensor/Resize.cpp b/src/Conversion/ONNXToKrnl/Tensor/Resize.cpp index 6318279f5d..75bbed3531 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/Resize.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/Resize.cpp @@ -121,8 +121,8 @@ struct ONNXResizeOpLowering : public OpConversionPattern { SmallVector lbs(rank, LitIE(0)); SmallVector ubs; create.krnlIE.getShapeAsDims(alloc, ubs); - create.krnl.iterateIE( - loopDef, loopDef, lbs, ubs, [&](KrnlBuilder &ck, ValueRange loopInd) { + create.krnl.iterateIE(loopDef, loopDef, lbs, ubs, + [&](const KrnlBuilder &ck, ValueRange loopInd) { MultiDialectBuilder create(ck); SmallVector readIndices; diff --git a/src/Conversion/ONNXToKrnl/Tensor/ReverseSequence.cpp b/src/Conversion/ONNXToKrnl/Tensor/ReverseSequence.cpp index 22517be281..d0a7f5d3ae 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/ReverseSequence.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/ReverseSequence.cpp @@ -96,7 +96,7 @@ struct ONNXReverseSequenceOpLowering ValueRange loopDef = create.krnl.defineLoops(outputRank); SmallVector lbs(outputRank, LitIE(0)); create.krnl.iterateIE(loopDef, loopDef, lbs, shapeHelper.getOutputDims(), - [&](KrnlBuilder &createKrnl, ValueRange loopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange loopInd) { IndexExprScope innerLoopScope(&rewriter, shapeHelper.getScope()); // compute the loop indices for the output diff --git a/src/Conversion/ONNXToKrnl/Tensor/ScatterElements.cpp b/src/Conversion/ONNXToKrnl/Tensor/ScatterElements.cpp index def26fd304..cdea57693d 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/ScatterElements.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/ScatterElements.cpp @@ -77,7 +77,7 @@ struct ONNXScatterElementsOpLowering DimsExpr lbs(updatesRank, LitIE(0)), ubs; create.krnlIE.getShapeAsDims(updates, ubs); create.krnl.iterateIE(loopDef, loopDef, lbs, ubs, - [&](KrnlBuilder &createKrnl, ValueRange loopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange loopInd) { // Insert code inside the loop. IndexExprScope innerLoopScope(createKrnl); diff --git a/src/Conversion/ONNXToKrnl/Tensor/ScatterND.cpp b/src/Conversion/ONNXToKrnl/Tensor/ScatterND.cpp index c877621d2e..2a585da52a 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/ScatterND.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/ScatterND.cpp @@ -72,7 +72,7 @@ struct ONNXScatterNDOpLowering : public OpConversionPattern { DimsExpr lbs(updatesRank, LitIE(0)), ubs; create.krnlIE.getShapeAsDims(updates, ubs); create.krnl.iterateIE(loopDef, loopDef, lbs, ubs, - [&](KrnlBuilder &createKrnl, ValueRange loopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange loopInd) { // Insert code inside the loop. IndexExprScope innerLoopScope(createKrnl); diff --git a/src/Conversion/ONNXToKrnl/Tensor/Slice.cpp b/src/Conversion/ONNXToKrnl/Tensor/Slice.cpp index c6797185a1..956637d553 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/Slice.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/Slice.cpp @@ -49,7 +49,7 @@ struct ONNXSliceOpLowering : public OpConversionPattern { ValueRange loopDef = create.krnl.defineLoops(outputRank); SmallVector lbs(outputRank, LitIE(0)); create.krnl.iterateIE(loopDef, loopDef, lbs, shapeHelper.getOutputDims(), - [&](KrnlBuilder &createKrnl, ValueRange loopInd) { + [&](const KrnlBuilder &createKrnl, ValueRange loopInd) { IndexExprScope loopScope(createKrnl); // Compute indices for the load and store op. diff --git a/src/Conversion/ONNXToKrnl/Tensor/Split.cpp b/src/Conversion/ONNXToKrnl/Tensor/Split.cpp index e00e73ffe3..e6490ffd3f 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/Split.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/Split.cpp @@ -69,7 +69,7 @@ LogicalResult ONNXSplitOpLoweringCommon(OP_TYPE splitOp, OP_ADAPTOR adaptor, SmallVector ubs; create.krnlIE.getShapeAsDims(allocs[i], ubs); create.krnl.iterateIE(loopDef, loopDef, lbs, ubs, - [&](KrnlBuilder &createKrnl, ValueRange indices) { + [&](const KrnlBuilder &createKrnl, ValueRange indices) { SmallVector readIndices; for (uint64_t r = 0; r < rank; ++r) { DimIndexExpr readIndex(indices[r]); diff --git a/src/Conversion/ONNXToKrnl/Tensor/Tile.cpp b/src/Conversion/ONNXToKrnl/Tensor/Tile.cpp index bc517b907b..f76cee05de 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/Tile.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/Tile.cpp @@ -81,7 +81,7 @@ struct ONNXTileOpLowering : public OpConversionPattern { SmallVector lbs(outputRank, LitIE(0)); create.krnl.iterateIE(loopDef, loopDef, lbs, shapeHelper.getOutputDims(), - [&](KrnlBuilder &createKrnl, ValueRange indices) { + [&](const KrnlBuilder &createKrnl, ValueRange indices) { // Compute the indices used by the input tensor load operation. // Note: An alternative implementation can be found at the end of this // file. diff --git a/src/Conversion/ONNXToKrnl/Tensor/Transpose.cpp b/src/Conversion/ONNXToKrnl/Tensor/Transpose.cpp index 86a5321731..c597396179 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/Transpose.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/Transpose.cpp @@ -161,7 +161,7 @@ struct ONNXTransposeOpLowering : public OpConversionPattern { } create->krnl.iterateIE(loopDef, loopDef, lbs, ubs, - [&](KrnlBuilder &createKrnl, ValueRange indices) { + [&](const KrnlBuilder &createKrnl, ValueRange indices) { // Compute the indices used by the load operation. SmallVector storeIndices; for (uint64_t i = 0; i < rank; ++i) { @@ -235,7 +235,7 @@ struct ONNXTransposeOpLowering : public OpConversionPattern { } } create->krnl.iterateIE(loopDef, loopDef, lbs, inUBs, - [&](KrnlBuilder &createKrnl, ValueRange indices) { + [&](const KrnlBuilder &createKrnl, ValueRange indices) { MultiDialectBuilder create(createKrnl); IndexExprScope loopScope(createKrnl); // Compute destination and source offsets for memcpy. diff --git a/src/Dialect/Krnl/DialectBuilder.cpp b/src/Dialect/Krnl/DialectBuilder.cpp index a645018de9..ae5054ca61 100644 --- a/src/Dialect/Krnl/DialectBuilder.cpp +++ b/src/Dialect/Krnl/DialectBuilder.cpp @@ -124,7 +124,7 @@ Value KrnlBuilder::vectorTypeCast(Value sourceMemref, int64_t vectorLen) const { } void KrnlBuilder::region( - function_ref bodyBuilderFn) const { + function_ref bodyBuilderFn) const { KrnlBuilder createKrnl(b(), loc()); KrnlRegionOp regionOp = b().create(loc()); { @@ -179,20 +179,19 @@ void KrnlBuilder::parallelClause( void KrnlBuilder::iterate(ValueRange originalLoops, ValueRange optimizedLoops, ValueRange lbs, ValueRange ubs, - function_ref + function_ref bodyBuilderFn) const { - auto bodyBuilderFnWrapper = [&](KrnlBuilder &createKrnl, ValueRange indices, - ValueRange iterArgs) { + auto bodyBuilderFnWrapper = [&](const KrnlBuilder &createKrnl, + ValueRange indices, ValueRange iterArgs) { bodyBuilderFn(createKrnl, indices); }; iterate(originalLoops, optimizedLoops, lbs, ubs, {}, bodyBuilderFnWrapper); } +// Deprecated KrnlIterateOp KrnlBuilder::iterate(ValueRange originalLoops, ValueRange optimizedLoops, ValueRange lbs, ValueRange ubs, ValueRange inits, - function_ref - bodyBuilderFn) const { + KrnlLoopBody2Fn bodyBuilderFn) const { // Check that originalLoops, lbs, and ubs have the same rank. assert(originalLoops.size() == lbs.size() && "expected same rank"); assert(originalLoops.size() == ubs.size() && "expected same rank"); @@ -213,21 +212,18 @@ KrnlIterateOp KrnlBuilder::iterate( void KrnlBuilder::iterateIE(ValueRange originalLoops, ValueRange optimizedLoops, ArrayRef lbs, ArrayRef ubs, - function_ref - bodyBuilderFn) const { - auto bodyBuilderFnWrapper = [&](KrnlBuilder &createKrnl, ValueRange indices, - ValueRange iterArgs) { + KrnlLoopBodyFn bodyBuilderFn) const { + auto bodyBuilderFnWrapper = [&](const KrnlBuilder &createKrnl, + ValueRange indices, ValueRange iterArgs) { bodyBuilderFn(createKrnl, indices); }; iterateIE(originalLoops, optimizedLoops, lbs, ubs, {}, bodyBuilderFnWrapper); } +// Deprecated. KrnlIterateOp KrnlBuilder::iterateIE(ValueRange originalLoops, ValueRange optimizedLoops, ArrayRef lbs, ArrayRef ubs, - ValueRange inits, - function_ref - bodyBuilderFn) const { + ValueRange inits, KrnlLoopBody2Fn bodyBuilderFn) const { // Check that originalLoops, lbs, and ubs have the same rank. assert(originalLoops.size() == lbs.size() && "expected same rank"); assert(originalLoops.size() == ubs.size() && "expected same rank"); diff --git a/src/Dialect/Krnl/DialectBuilder.hpp b/src/Dialect/Krnl/DialectBuilder.hpp index 53f92bdb9a..3a3c786aad 100644 --- a/src/Dialect/Krnl/DialectBuilder.hpp +++ b/src/Dialect/Krnl/DialectBuilder.hpp @@ -59,7 +59,8 @@ struct KrnlBuilder : public DialectBuilder { mlir::Value vectorTypeCast(mlir::Value sourceMemref, int64_t vectorLen) const; void region( - mlir::function_ref bodyBuilderFn) const; + mlir::function_ref bodyBuilderFn) + const; mlir::ValueRange defineLoops(int64_t originalLoopNum) const; mlir::ValueRange block(mlir::Value loop, int64_t blockSize) const; @@ -75,17 +76,19 @@ struct KrnlBuilder : public DialectBuilder { // function implement the body of the loop, and receive a KRNL builder and the // loop indices. using KrnlLoopBodyFn = - mlir::function_ref; + mlir::function_ref; + using KrnlLoopBody2Fn = mlir::function_ref; void iterate(mlir::ValueRange originalLoops, mlir::ValueRange optimizedLoops, mlir::ValueRange lbs, mlir::ValueRange ubs, KrnlLoopBodyFn bodyBuilderFn) const; + // Deprecated. mlir::KrnlIterateOp iterate(mlir::ValueRange originalLoops, mlir::ValueRange optimizedLoops, mlir::ValueRange lbs, mlir::ValueRange ubs, mlir::ValueRange inits, - mlir::function_ref - bodyBuilderFn) const; + KrnlLoopBody2Fn bodyBuilderFn) const; + mlir::KrnlIterateOp iterate( const krnl::KrnlIterateOperandPack &operands) const; @@ -93,12 +96,11 @@ struct KrnlBuilder : public DialectBuilder { void iterateIE(mlir::ValueRange originalLoops, mlir::ValueRange optimizedLoops, mlir::ArrayRef lbs, mlir::ArrayRef ubs, KrnlLoopBodyFn bodyBuilderFn) const; + // Deprecated. mlir::KrnlIterateOp iterateIE(mlir::ValueRange originalLoops, mlir::ValueRange optimizedLoops, mlir::ArrayRef lbs, mlir::ArrayRef ubs, mlir::ValueRange inits, - mlir::function_ref - bodyBuilderFn) const; + KrnlLoopBody2Fn bodyBuilderFn) const; // Common loop interface (krnl/affine/scf). void forLoopIE(IndexExpr lb, IndexExpr ub, int64_t step, bool useParallel, diff --git a/src/Dialect/Mlir/DialectBuilder.cpp b/src/Dialect/Mlir/DialectBuilder.cpp index 89ae3c5818..1075a25203 100644 --- a/src/Dialect/Mlir/DialectBuilder.cpp +++ b/src/Dialect/Mlir/DialectBuilder.cpp @@ -1697,9 +1697,8 @@ void MemRefBuilder::prefetchIE(Value memref, ArrayRef indices, // Structured Control Flow (SCF). //===----------------------------------------------------------------------===// -void SCFBuilder::ifThenElse(Value cond, - function_ref thenFn, - function_ref elseFn) const { +void SCFBuilder::ifThenElse( + Value cond, SCFThenElseBodyFn thenFn, SCFThenElseBodyFn elseFn) const { if (!elseFn) { b().create(loc(), cond, /* then */ diff --git a/src/Dialect/Mlir/DialectBuilder.hpp b/src/Dialect/Mlir/DialectBuilder.hpp index f247f6c9fe..32fc82e42c 100644 --- a/src/Dialect/Mlir/DialectBuilder.hpp +++ b/src/Dialect/Mlir/DialectBuilder.hpp @@ -466,14 +466,14 @@ struct SCFBuilder final : DialectBuilder { /// Create an if then with optional else. Construct does not generate a /// result (unlike some scf::if) and introduces the yields automatically. - void ifThenElse(mlir::Value cond, - mlir::function_ref thenFn, - mlir::function_ref elseFn = nullptr) const; + using SCFThenElseBodyFn = mlir::function_ref; + void ifThenElse(mlir::Value cond, SCFThenElseBodyFn thenFn, + SCFThenElseBodyFn elseFn = nullptr) const; // Common loop interface (krnl/affine/scf). using SCFLoopBodyFn = - mlir::function_ref; + mlir::function_ref; void forLoopIE(IndexExpr lb, IndexExpr ub, int64_t step, bool useParallel, - mlir::function_ref bodyFn) const; + SCFLoopBodyFn bodyFn) const; // Custom interface void forLoop( mlir::Value lb, mlir::Value ub, int64_t step, SCFLoopBodyFn bodyFn) const; @@ -614,7 +614,7 @@ struct GenericAffineBuilder final : DialectBuilder { // Common loop interface (krnl/affine/scf). using GenericAffineLoopBodyFn = - mlir::function_ref; + mlir::function_ref; void forLoopIE(IndexExpr lb, IndexExpr ub, int64_t step, bool useParallel, GenericAffineLoopBodyFn builderFn) const; @@ -656,10 +656,11 @@ struct GenericAffineBuilder final : DialectBuilder { GenericAffineSimdPostReductionBodyFn postReductionBodyFn) const; // This if then else construct has no arguments to the blocks. + using GenericAffineThenElseBodyFn = + mlir::function_ref &)>; void ifThenElseIE(IndexExprScope &scope, mlir::ArrayRef conditions, - mlir::function_ref thenFn, - mlir::function_ref elseFn) - const; + GenericAffineThenElseBodyFn thenFn, + GenericAffineThenElseBodyFn elseFn) const; // AffineApplyOp mlir::Value apply(mlir::AffineMap map, mlir::ValueRange operands) const; @@ -671,8 +672,7 @@ struct GenericAffineBuilder final : DialectBuilder { void recursionForLoopsIE(mlir::ArrayRef lbs, mlir::ArrayRef ubs, mlir::ArrayRef steps, llvm::SmallVectorImpl &loopIndices, - mlir::function_ref - builderFn) const; + GenericAffineLoopBodyFn builderFn) const; // Support for adding blocks. void appendToBlock(mlir::Block *block, @@ -691,8 +691,9 @@ using AffineBuilder = GenericAffineBuilder; - using valueFuncRef = mlir::function_ref; + using voidFuncRef = mlir::function_ref; + using valueFuncRef = + mlir::function_ref; LLVMBuilder(mlir::Location loc) : DialectBuilder(loc) {} LLVMBuilder(mlir::OpBuilder &b, mlir::Location loc) diff --git a/src/Dialect/Mlir/DialectBuilder.hpp.inc b/src/Dialect/Mlir/DialectBuilder.hpp.inc index 6e84be9931..43e8fa4534 100644 --- a/src/Dialect/Mlir/DialectBuilder.hpp.inc +++ b/src/Dialect/Mlir/DialectBuilder.hpp.inc @@ -145,7 +145,7 @@ krnl.iterate(loop i from 0 to 256) { In our example, the kernel is as follows - [&](KrnlBuilder &kb, ArrayRef inputVals, int64_t VL) { + [&](const KrnlBuilder &kb, ArrayRef inputVals, int64_t VL) { MultiDialectBuilder create(kb); Value aVal = inputVals[0]; // simd or scalar Value bVal = inputVals[1]; // simd or scalar @@ -192,7 +192,7 @@ void simdIterateIE(const BUILDER &builder, IndexExpr lb, IndexExpr ub, simdUb = simdUb - (VL - 1); // Define the loop block - auto simdLoopBody = [&](BUILDER b, mlir::ValueRange loopInd) { + auto simdLoopBody = [&](const BUILDER b, mlir::ValueRange loopInd) { IndexExprScope scope(b); VectorBuilder createVec(b); MEM_BUILDER createMem(b); @@ -260,7 +260,7 @@ void simdIterateIE(const BUILDER &builder, IndexExpr lb, IndexExpr ub, lb = lb + completedIters; } // Handle remaining scalar values (from lb to ub without unrolling). - auto scalarLoopBody = [&](BUILDER b, mlir::ValueRange loopInd) { + auto scalarLoopBody = [&](const BUILDER b, mlir::ValueRange loopInd) { IndexExprScope scope(b); MEM_BUILDER createMem(b); @@ -376,7 +376,7 @@ void simdReduceIE(const BUILDER &builder, IndexExpr lb, IndexExpr ub, if (!fullySimd) simdUb = simdUb - (VL - 1); - auto simdLoopBody = [&](BUILDER &b, mlir::ValueRange loopInd) { + auto simdLoopBody = [&](const BUILDER &b, mlir::ValueRange loopInd) { IndexExprScope scope(b); MultiDialectBuilder create(b); // Load inputs in SIMD mode, indexed by loopInd[0] in innermost dim. @@ -440,7 +440,7 @@ void simdReduceIE(const BUILDER &builder, IndexExpr lb, IndexExpr ub, // We have leftover iterations to be done in sequential mode. // Handle remaining scalar values (from lb to ub without unrolling). - auto scalarLoopBody = [&](BUILDER &b, mlir::ValueRange loopInd) { + auto scalarLoopBody = [&](const BUILDER &b, mlir::ValueRange loopInd) { IndexExprScope scope(b); MEM_BUILDER createMem(b); IndexExpr ind = DimIE(loopInd[0]); @@ -719,8 +719,8 @@ inline void GenericAffineBuilder::simdReduce2DIE( template inline void GenericAffineBuilder::ifThenElseIE( IndexExprScope &scope, mlir::ArrayRef conditions, - mlir::function_ref thenFn, - mlir::function_ref elseFn) const { + GenericAffineThenElseBodyFn thenFn, + GenericAffineThenElseBodyFn elseFn) const { int64_t rank = conditions.size(); llvm::SmallVector affineCond; bool allTrue = true; @@ -777,13 +777,13 @@ void GenericAffineBuilder::recursionForLoopsIE( mlir::ArrayRef lbs, mlir::ArrayRef ubs, mlir::ArrayRef steps, llvm::SmallVectorImpl &loopIndices, - mlir::function_ref - builderFn) const { + GenericAffineLoopBodyFn builderFn) const { int d = loopIndices.size(); if (d < (int)lbs.size()) { // Issue a loop and recurse again. forLoopIE(lbs[d], ubs[d], steps[d], - [&](GenericAffineBuilder &createAffine, mlir::ValueRange loopInd) { + [&](const GenericAffineBuilder &createAffine, + mlir::ValueRange loopInd) { loopIndices.emplace_back(loopInd[0]); recursionForLoopsIE(lbs, ubs, steps, loopIndices, builderFn); }); diff --git a/src/Dialect/Mlir/IndexExpr.cpp b/src/Dialect/Mlir/IndexExpr.cpp index e478bc9e10..65ac1f30ce 100644 --- a/src/Dialect/Mlir/IndexExpr.cpp +++ b/src/Dialect/Mlir/IndexExpr.cpp @@ -49,7 +49,7 @@ IndexExprScope::IndexExprScope(OpBuilder *rewriter, Location loc) getCurrentScopePtr() = this; } -IndexExprScope::IndexExprScope(DialectBuilder &db) +IndexExprScope::IndexExprScope(const DialectBuilder &db) : IndexExprScope(&db.getBuilder(), db.getLoc()) {} // Nested scopes. @@ -73,7 +73,7 @@ IndexExprScope::IndexExprScope( } IndexExprScope::IndexExprScope( - DialectBuilder &innerDb, IndexExprScope *enclosingScope) + const DialectBuilder &innerDb, IndexExprScope *enclosingScope) : IndexExprScope(&innerDb.getBuilder(), enclosingScope) {} IndexExprScope::~IndexExprScope() { diff --git a/src/Dialect/Mlir/IndexExpr.hpp b/src/Dialect/Mlir/IndexExpr.hpp index a925402791..e1cd247cb7 100644 --- a/src/Dialect/Mlir/IndexExpr.hpp +++ b/src/Dialect/Mlir/IndexExpr.hpp @@ -337,12 +337,12 @@ class IndexExprScope { // Constructor for a scope. Top level scope must provide rewriter (possibly // null if we cannot generate code at this time) and location. IndexExprScope(mlir::OpBuilder *rewriter, mlir::Location loc); - IndexExprScope(DialectBuilder &db); + IndexExprScope(const DialectBuilder &db); // Constructor for subsequent nested scopes. Providing enclosing scope is // technically not necessary (nullptr can be passed); it is used to allow a // user to explicitly name the enclosing scope. IndexExprScope(mlir::OpBuilder *rewriter, IndexExprScope *enclosingScope); - IndexExprScope(DialectBuilder &db, IndexExprScope *enclosingScope); + IndexExprScope(const DialectBuilder &db, IndexExprScope *enclosingScope); // Destructor which release all IndexExpr associated with this scope. virtual ~IndexExprScope();