diff --git a/lib/Dialect/TensorExt/IR/TensorExtAttributes.td b/lib/Dialect/TensorExt/IR/TensorExtAttributes.td index 9bac27eac..879c7b37d 100644 --- a/lib/Dialect/TensorExt/IR/TensorExtAttributes.td +++ b/lib/Dialect/TensorExt/IR/TensorExtAttributes.td @@ -64,4 +64,20 @@ def SIMDPacking_Attr : TensorExt_Attr<"SIMDPacking", "simd_packing", let assemblyFormat = "`<` struct(params) `>`"; } +def TensorExt_LayoutAttr : TensorExt_Attr<"Layout", "layout"> { + let summary = "Attribute denoting the layout of a tensor in a set of ciphertexts"; + let description = [{ + This attribute contains an affine map that describes the layout of a tensor + in a set of ciphertexts. The affine map is a function that maps tensor indices + to ciphertext indices (possibly with a ciphertext-selecting index). + + This attribute exists primarily to provide a "dialect attribute" which is + required to annotate the arguments of `func.func` arguments. + }]; + + let parameters = (ins "AffineMap": $layout); + let assemblyFormat = "`<` struct(params) `>`"; +} + + #endif // LIB_DIALECT_TENSOREXT_IR_TENSOREXTATTRIBUTES_TD_ diff --git a/lib/Dialect/TensorExt/IR/TensorExtDialect.td b/lib/Dialect/TensorExt/IR/TensorExtDialect.td index 12c44afec..0815f2785 100644 --- a/lib/Dialect/TensorExt/IR/TensorExtDialect.td +++ b/lib/Dialect/TensorExt/IR/TensorExtDialect.td @@ -17,6 +17,12 @@ def TensorExt_Dialect : Dialect { "tensor::TensorDialect", ]; + let extraClassDeclaration = [{ + constexpr const static ::llvm::StringLiteral + kLayoutAttrName = "tensor_ext.layout"; + }]; + + let useDefaultAttributePrinterParser = 1; } diff --git a/lib/Dialect/TensorExt/IR/TensorExtOps.cpp b/lib/Dialect/TensorExt/IR/TensorExtOps.cpp index a3ea13e15..fb5dd5cf8 100644 --- a/lib/Dialect/TensorExt/IR/TensorExtOps.cpp +++ b/lib/Dialect/TensorExt/IR/TensorExtOps.cpp @@ -37,28 +37,44 @@ LogicalResult RotateOp::verify() { return success(); } -LogicalResult ConvertLayoutOp::verify() { - int64_t rank = cast(getTensor().getType()).getRank(); - const AffineMap &fromLayout = getFromLayout().getValue(); - const AffineMap &toLayout = getToLayout().getValue(); - - if (rank != fromLayout.getNumDims() || rank != toLayout.getNumDims()) { - std::string fromLayoutStr, toLayoutStr; - llvm::raw_string_ostream fromLayoutStream(fromLayoutStr), - toLayoutStream(toLayoutStr); - fromLayout.print(fromLayoutStream); - toLayout.print(toLayoutStream); +LogicalResult verifyLayoutMatchesType(const AffineMap &layout, Type type, + Operation *op) { + int64_t rank = cast(type).getRank(); + if (rank != layout.getNumDims()) { + std::string layoutStr; + llvm::raw_string_ostream os(layoutStr); + layout.print(os); - return emitOpError() + return op->emitOpError() << "requires tensor rank to match the layout map's dimension count" - "but found rank " - << rank << " and maps " << fromLayoutStream.str() << " and " - << toLayoutStream.str(); + " but found rank " + << rank << " and map " << os.str(); + } + + return success(); +} + +LogicalResult ConvertLayoutOp::verify() { + LogicalResult inputVerification = verifyLayoutMatchesType( + getFromLayout().getValue(), getTensor().getType(), *this); + if (failed(inputVerification)) { + return inputVerification; + } + + LogicalResult outputVerification = verifyLayoutMatchesType( + getToLayout().getValue(), getResult().getType(), *this); + if (failed(outputVerification)) { + return outputVerification; } return success(); } +LogicalResult AssignLayoutOp::verify() { + return verifyLayoutMatchesType(getLayout().getValue(), getTensor().getType(), + *this); +} + } // namespace tensor_ext } // namespace heir } // namespace mlir diff --git a/lib/Dialect/TensorExt/IR/TensorExtOps.td b/lib/Dialect/TensorExt/IR/TensorExtOps.td index 80d4a8f1d..f42033518 100644 --- a/lib/Dialect/TensorExt/IR/TensorExtOps.td +++ b/lib/Dialect/TensorExt/IR/TensorExtOps.td @@ -58,10 +58,31 @@ def TensorExt_ConvertLayoutOp : TensorExt_Op<"convert_layout", [Pure, AllTypesMa This op is inserted by layout selection passes. }]; + let assemblyFormat = "operands attr-dict `:` type($output)"; let arguments = (ins AnyRankedTensor:$tensor, Builtin_AffineMapAttr:$from_layout, Builtin_AffineMapAttr:$to_layout); let results = (outs AnyRankedTensor:$output); let hasVerifier = 1; } +def TensorExt_AssignLayoutOp : TensorExt_Op<"assign_layout", [Pure, AllTypesMatch<["tensor", "output"]>]> { + let summary = "Assign a layout to a plaintext tensor."; + let description = [{ + This op allows the ingestion of a plaintext tensor into the layout system. + For example, ops like `linalg.reduce`, require a tensor input to represent + initial values. These will generally be created by an `arith.constant` or + `tensor.empty` op, which does not have secret results. Lowerings will + convert this to a packed plaintext, so that the subsequent ops can be + lowered as ciphertext-plaintext ops. + + This op is inserted by layout selection passes. + }]; + + let assemblyFormat = "operands attr-dict `:` type($output)"; + let arguments = (ins AnyRankedTensor:$tensor, Builtin_AffineMapAttr:$layout); + let results = (outs AnyRankedTensor:$output); + let hasVerifier = 1; +} + + #endif // LIB_DIALECT_TENSOREXT_IR_TENSOREXTOPS_TD_ diff --git a/lib/Pipelines/ArithmeticPipelineRegistration.cpp b/lib/Pipelines/ArithmeticPipelineRegistration.cpp index a109ecc64..92b944968 100644 --- a/lib/Pipelines/ArithmeticPipelineRegistration.cpp +++ b/lib/Pipelines/ArithmeticPipelineRegistration.cpp @@ -20,7 +20,10 @@ #include "lib/Dialect/TensorExt/Transforms/RotateAndReduce.h" #include "lib/Pipelines/PipelineRegistration.h" #include "lib/Transforms/ApplyFolders/ApplyFolders.h" +#include "lib/Transforms/DropUnitDims/DropUnitDims.h" +#include "lib/Transforms/ForwardStoreToLoad/ForwardStoreToLoad.h" #include "lib/Transforms/FullLoopUnroll/FullLoopUnroll.h" +#include "lib/Transforms/LayoutPropagation/LayoutPropagation.h" #include "lib/Transforms/LinalgCanonicalizations/LinalgCanonicalizations.h" #include "lib/Transforms/OperationBalancer/OperationBalancer.h" #include "lib/Transforms/OptimizeRelinearization/OptimizeRelinearization.h" @@ -82,8 +85,15 @@ void mlirToSecretArithmeticPipelineBuilder(OpPassManager &pm) { pm.addPass(createCanonicalizerPass()); pm.addPass(createCSEPass()); - // Apply linalg kernels + // Linalg canonicalization + // TODO(#1191): enable dropping unit dims to convert matmul to matvec/vecmat + // pm.addPass(createDropUnitDims()); pm.addPass(createLinalgCanonicalizations()); + + // Layout assignment and lowering + // TODO(#1191): enable layout propagation after implementing the rest + // of the layout lowering pipeline. + // pm.addPass(createLayoutPropagation()); pm.addPass(heir::linalg::createLinalgToTensorExt()); // Vectorize and optimize rotations diff --git a/lib/Pipelines/BUILD b/lib/Pipelines/BUILD index 28c1f9eaa..9a59ab218 100644 --- a/lib/Pipelines/BUILD +++ b/lib/Pipelines/BUILD @@ -104,8 +104,10 @@ cc_library( "@heir//lib/Dialect/TensorExt/Transforms:InsertRotate", "@heir//lib/Dialect/TensorExt/Transforms:RotateAndReduce", "@heir//lib/Transforms/ApplyFolders", + "@heir//lib/Transforms/DropUnitDims", "@heir//lib/Transforms/ForwardStoreToLoad", "@heir//lib/Transforms/FullLoopUnroll", + "@heir//lib/Transforms/LayoutPropagation", "@heir//lib/Transforms/LinalgCanonicalizations", "@heir//lib/Transforms/MemrefToArith:ExpandCopy", "@heir//lib/Transforms/MemrefToArith:MemrefToArithRegistration", diff --git a/lib/Transforms/LayoutPropagation/BUILD b/lib/Transforms/LayoutPropagation/BUILD new file mode 100644 index 000000000..da126a384 --- /dev/null +++ b/lib/Transforms/LayoutPropagation/BUILD @@ -0,0 +1,32 @@ +load("@heir//lib/Transforms:transforms.bzl", "add_heir_transforms") + +package( + default_applicable_licenses = ["@heir//:license"], + default_visibility = ["//visibility:public"], +) + +cc_library( + name = "LayoutPropagation", + srcs = ["LayoutPropagation.cpp"], + hdrs = ["LayoutPropagation.h"], + deps = [ + ":pass_inc_gen", + "@heir//lib/Analysis/SecretnessAnalysis", + "@heir//lib/Dialect/Secret/IR:Dialect", + "@heir//lib/Dialect/TensorExt/IR:Dialect", + "@llvm-project//mlir:ArithDialect", + "@llvm-project//mlir:FuncDialect", + "@llvm-project//mlir:IR", + "@llvm-project//mlir:LinalgDialect", + "@llvm-project//mlir:Pass", + "@llvm-project//mlir:Support", + "@llvm-project//mlir:TensorDialect", + "@llvm-project//mlir:Transforms", + ], +) + +add_heir_transforms( + generated_target_name = "pass_inc_gen", + pass_name = "LayoutPropagation", + td_file = "LayoutPropagation.td", +) diff --git a/lib/Transforms/LayoutPropagation/LayoutPropagation.cpp b/lib/Transforms/LayoutPropagation/LayoutPropagation.cpp new file mode 100644 index 000000000..b194576e5 --- /dev/null +++ b/lib/Transforms/LayoutPropagation/LayoutPropagation.cpp @@ -0,0 +1,684 @@ +#include "lib/Transforms/LayoutPropagation/LayoutPropagation.h" + +#include "lib/Analysis/SecretnessAnalysis/SecretnessAnalysis.h" +#include "lib/Dialect/Secret/IR/SecretOps.h" +#include "lib/Dialect/Secret/IR/SecretTypes.h" +#include "lib/Dialect/TensorExt/IR/TensorExtAttributes.h" +#include "lib/Dialect/TensorExt/IR/TensorExtOps.h" +#include "llvm/include/llvm/ADT/TypeSwitch.h" // from @llvm-project +#include "llvm/include/llvm/Support/Debug.h" // from @llvm-project +#include "mlir/include/mlir/Analysis/DataFlow/ConstantPropagationAnalysis.h" // from @llvm-project +#include "mlir/include/mlir/Analysis/DataFlow/DeadCodeAnalysis.h" // from @llvm-project +#include "mlir/include/mlir/Analysis/DataFlowFramework.h" // from @llvm-project +#include "mlir/include/mlir/Dialect/Linalg/IR/Linalg.h" // from @llvm-project +#include "mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.h" // from @llvm-project +#include "mlir/include/mlir/IR/AffineMap.h" // from @llvm-project + +#define DEBUG_TYPE "layout-propagation" + +namespace mlir { +namespace heir { + +using linalg::ReduceOp; +using linalg::VecmatOp; +using ::mlir::arith::AddIOp; +using ::mlir::arith::ConstantOp; +using ::mlir::arith::MulIOp; +using secret::GenericOp; +using secret::SecretType; +using secret::YieldOp; +using tensor::CollapseShapeOp; +using tensor::EmptyOp; +using tensor::ExpandShapeOp; +using tensor_ext::AssignLayoutOp; +using tensor_ext::ConvertLayoutOp; +using tensor_ext::LayoutAttr; + +#define GEN_PASS_DEF_LAYOUTPROPAGATION +#include "lib/Transforms/LayoutPropagation/LayoutPropagation.h.inc" + +// The result of a compatibility check for the layouts of an op's operands (cf. +// hasCompatibleArgumentLayouts). If the check fails, the presence of a +// diagnostic signals that the failure is unrecoverable and should cause the +// pass to fail. If the diagnostic is nullopt, then the failure can be +// recovered by rectifyIncompatibleOperandLayouts. +struct CompatibilityResult { + bool compatible; + std::optional diag; +}; + +struct LayoutPropagation : impl::LayoutPropagationBase { + using LayoutPropagationBase::LayoutPropagationBase; + + // Top level visit method handles common logic for all ops, e.g., inserting + // conversions. + LogicalResult visitOperation(Operation *op); + + // Op-specific transfer functions + LogicalResult visitOperation(AddIOp op); + LogicalResult visitOperation(CollapseShapeOp op); + LogicalResult visitOperation(ConstantOp op); + LogicalResult visitOperation(EmptyOp op); + LogicalResult visitOperation(ExpandShapeOp op); + LogicalResult visitOperation(GenericOp op); + LogicalResult visitOperation(MulIOp op); + LogicalResult visitOperation(ReduceOp op); + LogicalResult visitOperation(VecmatOp op); + LogicalResult visitOperation(YieldOp op); + LogicalResult visitOperation(func::FuncOp op); + LogicalResult visitOperation(func::ReturnOp op); + + // Return true if the operand layouts are compatible for the operation, and + // false if not. Include an InFlightDiagnostic if an operand is encountered + // that requires a layout, but none has been set. + CompatibilityResult hasCompatibleArgumentLayouts(Operation *op); + + // Op-specific compatibility functions + CompatibilityResult hasCompatibleArgumentLayouts(ReduceOp op); + CompatibilityResult hasCompatibleArgumentLayouts(VecmatOp op); + + // Insert conversion ops to rectify incompatible operand layouts + void rectifyIncompatibleOperandLayouts(Operation *op); + + // Op-specific overrides + void rectifyIncompatibleOperandLayouts(ReduceOp op); + + // Return the default layout for a given type + FailureOr defaultLayoutForType(Type type); + + // Helper to pass layouts through generic ops + void passLayoutThroughOp(Operation *op); + + // Add an op attribute denoting the layouts of the op results. Assumes the + // assignedLayouts map contains the layout for the result SSA values already. + void setResultLayoutAttr(Operation *op); + + void runOnOperation() override; + + DenseMap assignedLayouts; + DataFlowSolver *solver; +}; + +void visitDebugInfo(Operation *op) { + LLVM_DEBUG(llvm::dbgs() << "Visiting: " << op->getName() << "\n"); +} + +void debugAssignLayout(Value value, AffineMap layout) { + LLVM_DEBUG(llvm::dbgs() << "Assigning layout " << layout << " to value " + << value << "\n"); +} + +LogicalResult LayoutPropagation::visitOperation(Operation *op) { + visitDebugInfo(op); + + if (!isa(op) && + !isSecret(op->getOperands(), solver) && + !isSecret(op->getResults(), solver)) { + LLVM_DEBUG(llvm::dbgs() + << "Skipping op " << op->getName() + << " because its operands and results are non-secret, or it is " + "in a special allowlist of ops to ignore\n"); + return success(); + } + + // If an operand has no layout, it may for example be produced as a plaintext + // constant, such as a zero-valued tensor for the initializer of a reduction. + // In this case, we insert a layout assignment. + for (auto operand : op->getOperands()) { + if (!assignedLayouts.contains(operand)) { + if (isa(operand.getType())) { + LLVM_DEBUG(llvm::dbgs() << "tensor operand " << operand + << " has no layout assigned\n"); + FailureOr layout = defaultLayoutForType(operand.getType()); + if (failed(layout)) { + return failure(); + } + mlir::IRRewriter builder(&getContext()); + builder.setInsertionPoint(op); + AssignLayoutOp assignLayoutOp = builder.create( + op->getLoc(), operand, AffineMapAttr::get(layout.value())); + Value toReplace = assignLayoutOp.getResult(); + builder.replaceAllUsesExcept(operand, toReplace, assignLayoutOp); + debugAssignLayout(toReplace, layout.value()); + assignedLayouts.insert({toReplace, layout.value()}); + } + } + } + + auto [compatible, diag] = hasCompatibleArgumentLayouts(op); + if (!compatible) { + if (diag.has_value()) { + // An InFlightDiagnostic casts to a failure() + return diag.value(); + } + rectifyIncompatibleOperandLayouts(op); + } + + return TypeSwitch(op) + // func ops + .Case( + [&](auto op) { return visitOperation(op); }) + // arith ops + .Case( + [&](auto op) { return visitOperation(op); }) + // secret ops + .Case([&](auto op) { return visitOperation(op); }) + // linalg ops + .Case([&](auto op) { return visitOperation(op); }) + // tensor ops + .Case( + [&](auto op) { return visitOperation(op); }) + .Default([&](Operation *op) { return success(); }); +} + +CompatibilityResult LayoutPropagation::hasCompatibleArgumentLayouts( + Operation *op) { + return TypeSwitch(op) + // Trivially true ops + .Case( + [&](auto op) { return CompatibilityResult{true, std::nullopt}; }) + // Ops with special rules + .Case( + [&](auto op) { return hasCompatibleArgumentLayouts(op); }) + // By default, assume operands must all have the same layout. + .Default([&](Operation *op) { + std::optional firstFoundLayout; + + for (auto &operand : op->getOpOperands()) { + if (isa(operand.get().getType())) { + if (!assignedLayouts.contains(operand.get())) { + // If the operand has no layout, we can't propagate layout + // information to the result. + return CompatibilityResult{ + false, op->emitError("operand has no assigned layout")}; + } + AffineMap layout = assignedLayouts.at(operand.get()); + + if (!firstFoundLayout.has_value()) firstFoundLayout = layout; + if (layout != firstFoundLayout.value()) { + return CompatibilityResult{false, std::nullopt}; + } + } + } + + return CompatibilityResult{true, std::nullopt}; + }); +} + +// A helper to convert the layout of an input tensor to a reduce op. The result +// layout is equivalent to reducing the summed dimensions to 1 and then +// dropping them. +// +// TODO(1352): Determine if/how to support repetition in the layout. +AffineMap convertLayoutForReduce(AffineMap inputLayout, + ArrayRef dimsToReduce) { + unsigned numDims = inputLayout.getNumDims(); + llvm::SmallBitVector dimsBV(numDims, false); + for (int dimToSum : dimsToReduce) dimsBV.set(dimToSum); + return projectDims(inputLayout, dimsBV, /*compressDims=*/true); +} + +CompatibilityResult LayoutPropagation::hasCompatibleArgumentLayouts( + ReduceOp op) { + // The arguments of a ReduceOp are the tensor(s) to reduce and the + // initializer values for the reduction. + for (const auto &[input, init] : llvm::zip(op.getInputs(), op.getInits())) { + if (!assignedLayouts.contains(input)) { + return {false, op->emitError("input tensor has no assigned layout")}; + } + if (!assignedLayouts.contains(init)) { + return {false, + op->emitError("initializer tensor has no assigned layout")}; + } + + AffineMap inputLayout = assignedLayouts.at(input); + AffineMap initLayout = assignedLayouts.at(init); + AffineMap reducedInputLayout = + convertLayoutForReduce(inputLayout, op.getDimensions()); + + if (reducedInputLayout != initLayout) { + return {false, std::nullopt}; + } + } + + return {true, std::nullopt}; +} + +CompatibilityResult LayoutPropagation::hasCompatibleArgumentLayouts( + VecmatOp op) { + // Currently only support secret vectors and plaintext matrices. + linalg::ContractionOpInterface vecmatOp = + cast(op.getOperation()); + Value vec = vecmatOp.lhs(); + Value mat = vecmatOp.rhs(); + if (isSecret(mat, solver) || !isSecret(vec, solver)) { + return {false, + op->emitError("Only secret vectors and plaintext matrices are " + "supported for linalg.vecmat")}; + } + + if (!assignedLayouts.contains(vec)) { + return {false, op->emitError("vector operand has no assigned layout")}; + } + return {true, std::nullopt}; +} + +void LayoutPropagation::rectifyIncompatibleOperandLayouts(Operation *op) { + LLVM_DEBUG({ + auto diag = op->emitRemark() << "Inserting layout conversion op due to " + "disagreeing operand layouts"; + auto ¬e = diag.attachNote(); + for (auto operand : op->getOperands()) { + std::string mapStr; + llvm::raw_string_ostream os(mapStr); + AffineMap operandLayout; + if (assignedLayouts.contains(operand)) + operandLayout = assignedLayouts.at(operand); + operandLayout.print(os); + note << "\n- Operand: " << operand << "; Layout: " << os.str(); + } + }); + + TypeSwitch(op) + // Ops with special rules + .Case( + [&](auto op) { return rectifyIncompatibleOperandLayouts(op); }) + .Default([&](Operation *op) { + // Default target layout is chosen arbitrarily as the first operand's + // layout for now. A different pass is responsible for optimizing the + // placement and mechanics of the layout conversion ops. + mlir::IRRewriter builder(&getContext()); + const auto it = llvm::find_if(op->getOperands(), [this](Value pair) { + return assignedLayouts.contains(pair); + }); + AffineMap targetLayout = assignedLayouts.at(*it); + + for (auto &opOperand : op->getOpOperands()) { + if (!assignedLayouts.contains(opOperand.get())) continue; + AffineMap sourceLayout = assignedLayouts.at(opOperand.get()); + + if (sourceLayout != targetLayout) { + builder.setInsertionPoint(op); + ConvertLayoutOp convertOp = builder.create( + op->getLoc(), opOperand.get(), AffineMapAttr::get(sourceLayout), + AffineMapAttr::get(targetLayout)); + + // Layout of the result is the same as the target layout of the + // conversion. Mostly this is done for consistency: all ops have an + // attribute describing the layout of their results. + OpBuilder builder(&getContext()); + assignedLayouts.insert({convertOp.getResult(), targetLayout}); + setResultLayoutAttr(convertOp); + op->setOperand(opOperand.getOperandNumber(), convertOp.getResult()); + } + } + }); +} + +void LayoutPropagation::rectifyIncompatibleOperandLayouts(ReduceOp op) { + mlir::IRRewriter builder(&getContext()); + builder.setInsertionPoint(op); + + for (const auto &[input, init] : llvm::zip(op.getInputs(), op.getInits())) { + AffineMap inputLayout = assignedLayouts.at(input); + AffineMap initLayout = assignedLayouts.at(init); + AffineMap reducedInputLayout = + convertLayoutForReduce(inputLayout, op.getDimensions()); + + if (reducedInputLayout != initLayout) { + ConvertLayoutOp convertOp = builder.create( + op->getLoc(), init, AffineMapAttr::get(initLayout), + AffineMapAttr::get(reducedInputLayout)); + Value toReplace = convertOp.getResult(); + // I'd like to just call op.setOperand(i, toReplace) here, but I can't + // figure out how the i'th entry in `getInits` corresponds to the general + // OpOperand index. + builder.replaceUsesWithIf(init, toReplace, [&](OpOperand &operand) { + return operand.getOwner() == op; + }); + assignedLayouts.insert({toReplace, reducedInputLayout}); + setResultLayoutAttr(convertOp); + } + } +} + +LogicalResult LayoutPropagation::visitOperation(func::FuncOp op) { + // Set a default value for each argument + int argIndex = 0; + for (Value arg : op.getArguments()) { + FailureOr layout = defaultLayoutForType(arg.getType()); + if (failed(layout)) { + return failure(); + } + debugAssignLayout(arg, layout.value()); + assignedLayouts.insert({arg, layout.value()}); + + // FuncOp requires arg attributes are defined as dialect attributes, + // so we can't use an AffineMapAttr here. + op.setArgAttr(argIndex, tensor_ext::TensorExtDialect::kLayoutAttrName, + LayoutAttr::get(&getContext(), layout.value())); + ++argIndex; + } + + // Func result attrs are handled by the ReturnOp + return success(); +} + +LogicalResult LayoutPropagation::visitOperation(func::ReturnOp op) { + func::FuncOp func = op->getParentOfType(); + for (OpOperand &operand : op->getOpOperands()) { + if (!assignedLayouts.contains(operand.get())) { + if (isSecret(operand.get(), solver)) { + return op->emitError("secret return value has no assigned layout"); + } + + // It needs no layout. + continue; + } + AffineMap layout = assignedLayouts.at(operand.get()); + func.setResultAttr(operand.getOperandNumber(), + tensor_ext::TensorExtDialect::kLayoutAttrName, + LayoutAttr::get(&getContext(), layout)); + } + return success(); +} + +LogicalResult LayoutPropagation::visitOperation(GenericOp op) { + // Every block argument has the same layout as its corresponding operand. + for (OpOperand &operand : op->getOpOperands()) { + if (!assignedLayouts.contains(operand.get())) { + // Assume it is not a tensor type and doesn't need a layout. + continue; + } + AffineMap layout = assignedLayouts.at(operand.get()); + BlockArgument blockArg = + op.getRegion().getArgument(operand.getOperandNumber()); + assignedLayouts.insert({blockArg, layout}); + op.setArgAttr(operand.getOperandNumber(), "layout", + AffineMapAttr::get(layout)); + debugAssignLayout(blockArg, layout); + } + + // The layout of the result of the generic op is handled when the YieldOp is + // visited. + return success(); +} + +LogicalResult LayoutPropagation::visitOperation(YieldOp op) { + // The results of the generic op has the same layouts as the yielded values + GenericOp generic = op->getParentOfType(); + for (OpOperand &operand : op->getOpOperands()) { + Type operandType = operand.get().getType(); + if (!assignedLayouts.contains(operand.get())) { + // If it's a tensor type, it may be something like a tensor.empty() + // that would not be assigned a layout earlier in the walk, because + // it does not depend on any secret information. In this case, use the + // default layout. + LLVM_DEBUG(llvm::dbgs() << "No layout assigned to operand " + << operand.get() << ", using default layout\n"); + if (isa(operandType)) { + FailureOr layout = defaultLayoutForType(operandType); + if (failed(layout)) { + return failure(); + } + debugAssignLayout(operand.get(), layout.value()); + assignedLayouts.insert({operand.get(), layout.value()}); + } else { + // Assume it is not a tensor type and doesn't need a layout. + continue; + } + } + AffineMap layout = assignedLayouts.at(operand.get()); + Value result = generic.getResult(operand.getOperandNumber()); + assignedLayouts.insert({result, layout}); + debugAssignLayout(result, layout); + } + setResultLayoutAttr(generic); + return success(); +} + +void LayoutPropagation::passLayoutThroughOp(Operation *op) { + // All inputs have the same layout, so just propagate it to all results + for (Value result : op->getResults()) { + if (isa(result.getType())) { + AffineMap layout = assignedLayouts.at(op->getOperand(0)); + assignedLayouts.insert({result, layout}); + debugAssignLayout(result, layout); + } + } + setResultLayoutAttr(op); +} + +LogicalResult LayoutPropagation::visitOperation(ConstantOp op) { + // Constant ops can take any layout, but to start they are implemented to have + // row-major layouts. But if a later pass back-propagates a layout from a + // later op, an EmptyOp can trivially take on that changed layout. + Value result = op.getResult(); + FailureOr layout = defaultLayoutForType(result.getType()); + if (failed(layout)) { + return failure(); + } + debugAssignLayout(result, layout.value()); + assignedLayouts.insert({result, layout.value()}); + return success(); +} + +LogicalResult LayoutPropagation::visitOperation(EmptyOp op) { + // Empty ops can take any layout, but to start they are implemented to have + // row-major layouts. But if a later pass back-propagates a layout from a + // later op, an EmptyOp can trivially take on that changed layout. + Value result = op.getResult(); + FailureOr layout = defaultLayoutForType(result.getType()); + if (failed(layout)) { + return failure(); + } + debugAssignLayout(result, layout.value()); + assignedLayouts.insert({result, layout.value()}); + return success(); +} + +LogicalResult LayoutPropagation::visitOperation(CollapseShapeOp op) { + // Only support rank-reduced types for now, i.e., where the collapsed + // shape only removes static dimensions of size 1. + SliceVerificationResult res = + isRankReducedType(op.getSrcType(), op.getResultType()); + if (res != SliceVerificationResult::Success) + return op->emitError( + "Only rank-reduced types are supported for CollapseShapeOp"); + + auto tensor = op.getSrc(); + AffineMap inputLayout = assignedLayouts.at(tensor); + unsigned numDims = tensor.getType().getRank(); + llvm::SmallBitVector dimsBV(numDims, false); + + for (Attribute associationGroup : op.getReassociation()) { + auto associationArray = dyn_cast(associationGroup).getValue(); + // a single-entry association group is a no-op + if (associationArray.size() == 1) { + continue; + } + for (Attribute association : associationArray) { + int64_t reassocDim = cast(association).getInt(); + if (op.getSrcType().getShape()[reassocDim] == 1) dimsBV.set(reassocDim); + } + } + + AffineMap resultLayout = + projectDims(inputLayout, dimsBV, /*compressDims=*/true); + assignedLayouts.insert({op.getResult(), resultLayout}); + setResultLayoutAttr(op); + debugAssignLayout(op.getResult(), resultLayout); + return success(); +} + +LogicalResult LayoutPropagation::visitOperation(ExpandShapeOp op) { + MLIRContext *context = &getContext(); + // Only support rank-reduced types for now, i.e., where the expanded shape + // only adds static dimensions of size 1. + SliceVerificationResult res = + isRankReducedType(op.getResultType(), op.getSrcType()); + if (res != SliceVerificationResult::Success) + return op->emitError( + "Only rank-reduced types are supported for ExpandShapeOp"); + + auto tensor = op.getSrc(); + AffineMap inputLayout = assignedLayouts.at(tensor); + + // tensor indices correspond to layout dimensions, and adding a dimension of + // size 1 has no effect on the affine map expressions, so all we're doing is + // adding new dimensions for each reassociation group index corresponding to + // an output dimension of size 1. Mainly we have to ensure that the dimension + // we're adding is in the correct index of the affine map's dimension list. + int oldDim = 0; + DenseMap oldDimsToNewDims; + for (Attribute associationGroup : op.getReassociation()) { + auto associationArray = dyn_cast(associationGroup).getValue(); + // a single-entry association group is a no-op + if (associationArray.size() == 1) { + oldDimsToNewDims[getAffineDimExpr(oldDim, context)] = getAffineDimExpr( + cast(associationArray[0]).getInt(), context); + ++oldDim; + continue; + } + + for (Attribute association : associationArray) { + int64_t reassocDim = cast(association).getInt(); + if (op.getResultType().getShape()[reassocDim] > 1) { + oldDimsToNewDims[getAffineDimExpr(oldDim, context)] = + getAffineDimExpr(reassocDim, context); + ++oldDim; + } + } + } + + int resultNumDims = op.getResultType().getRank(); + // First create a larger-rank affine map, but using old dimension identifiers + AffineMap resLayout1 = + AffineMap::get(resultNumDims, /*symbolCount=*/0, inputLayout.getResults(), + &getContext()); + + // Then replace the old dimension identifier expressions with new ones + AffineMap resultLayout = resLayout1.replace(oldDimsToNewDims); + + assignedLayouts.insert({op.getResult(), resultLayout}); + setResultLayoutAttr(op); + debugAssignLayout(op.getResult(), resultLayout); + return success(); +} + +LogicalResult LayoutPropagation::visitOperation(VecmatOp op) { + auto vecmatOp = cast(*op); + auto vec = vecmatOp.lhs(); + + // The matrix has no assigned layout because it is assumed to be + // plaintext/static (this is intended to be enforced by + // hasCompatibleArgumentLayouts). + AffineMap vecLayout = assignedLayouts.at(vec); + + // Always one result, and it's a vector with the same layout + // as the input vector + auto result = vecmatOp->getResult(0); + AffineMap resultLayout = vecLayout; + + assignedLayouts.insert({result, resultLayout}); + setResultLayoutAttr(op); + debugAssignLayout(result, resultLayout); + return success(); +} + +LogicalResult LayoutPropagation::visitOperation(AddIOp op) { + passLayoutThroughOp(op); + return success(); +} + +LogicalResult LayoutPropagation::visitOperation(MulIOp op) { + passLayoutThroughOp(op); + return success(); +} + +LogicalResult LayoutPropagation::visitOperation(ReduceOp op) { + for (const auto &[tensor, result] : + llvm::zip(op.getInputs(), op.getResults())) { + AffineMap resultLayout = + convertLayoutForReduce(assignedLayouts.at(tensor), op.getDimensions()); + assignedLayouts.insert({result, resultLayout}); + debugAssignLayout(result, resultLayout); + } + setResultLayoutAttr(op); + return success(); +} + +FailureOr LayoutPropagation::defaultLayoutForType(Type type) { + Type ty = type; + if (SecretType secretType = dyn_cast(type)) { + ty = secretType.getValueType(); + } + + // RankedTensorType is laid out by default in row-major order + if (RankedTensorType tensorType = dyn_cast(ty)) { + unsigned rank = tensorType.getRank(); + ArrayRef shape = tensorType.getShape(); + SmallVector dims; + for (unsigned i = 0; i < rank; ++i) { + dims.push_back(getAffineDimExpr(i, type.getContext())); + } + + // For a tensor of type tensor, the row-major layout + // would be represented by the AffineMap: + // + // (d0, d1) -> (d0 * n2 + d1) + // + // For a 3-dimension tensor of shape (n1, n2, n3), it would be + // + // (d0, d1, d2) -> (d0 * n2 * n3 + d1 * n3 + d2) + // + // And so on. + AffineExpr expr = dims[0]; + for (unsigned i = 1; i < rank; ++i) { + expr = expr * shape[i] + dims[i]; + } + + return AffineMap::get(rank, /*symbolCount=*/0, expr); + } + + return failure(); +} + +void LayoutPropagation::setResultLayoutAttr(Operation *op) { + OpBuilder builder(&getContext()); + SmallVector resultLayouts = llvm::map_to_vector( + op->getResults(), + [&](Value result) { return assignedLayouts.at(result); }); + op->setAttr("layout", builder.getAffineMapArrayAttr(resultLayouts)); +} + +void LayoutPropagation::runOnOperation() { + DataFlowSolver solver; + solver.load(); + solver.load(); + solver.load(); + if (failed(solver.initializeAndRun(getOperation()))) { + getOperation()->emitOpError() << "Failed to run secretness analysis.\n"; + signalPassFailure(); + return; + } + this->solver = &solver; + + LLVM_DEBUG(llvm::dbgs() << "Running layout propagation on operation: " + << getOperation()->getName() << "\n"); + WalkResult result = + getOperation()->walk([&](Operation *op) { + LogicalResult result = visitOperation(op); + if (failed(result)) { + return WalkResult::interrupt(); + } + return WalkResult::advance(); + }); + + if (result.wasInterrupted()) { + signalPassFailure(); + } +}; + +} // namespace heir +} // namespace mlir diff --git a/lib/Transforms/LayoutPropagation/LayoutPropagation.h b/lib/Transforms/LayoutPropagation/LayoutPropagation.h new file mode 100644 index 000000000..ee6a494c1 --- /dev/null +++ b/lib/Transforms/LayoutPropagation/LayoutPropagation.h @@ -0,0 +1,23 @@ +#ifndef LIB_TRANSFORMS_LAYOUTPROPAGATION_LAYOUTPROPAGATION_H_ +#define LIB_TRANSFORMS_LAYOUTPROPAGATION_LAYOUTPROPAGATION_H_ + +#include "lib/Dialect/Secret/IR/SecretDialect.h" +#include "lib/Dialect/TensorExt/IR/TensorExtDialect.h" +#include "mlir/include/mlir/Dialect/Arith/IR/Arith.h" // from @llvm-project +#include "mlir/include/mlir/Dialect/Func/IR/FuncOps.h" // from @llvm-project +#include "mlir/include/mlir/Dialect/Tensor/IR/Tensor.h" // from @llvm-project +#include "mlir/include/mlir/Pass/Pass.h" // from @llvm-project + +namespace mlir { +namespace heir { + +#define GEN_PASS_DECL +#include "lib/Transforms/LayoutPropagation/LayoutPropagation.h.inc" + +#define GEN_PASS_REGISTRATION +#include "lib/Transforms/LayoutPropagation/LayoutPropagation.h.inc" + +} // namespace heir +} // namespace mlir + +#endif // LIB_TRANSFORMS_LAYOUTPROPAGATION_LAYOUTPROPAGATION_H_ diff --git a/lib/Transforms/LayoutPropagation/LayoutPropagation.td b/lib/Transforms/LayoutPropagation/LayoutPropagation.td new file mode 100644 index 000000000..2d14b2fe3 --- /dev/null +++ b/lib/Transforms/LayoutPropagation/LayoutPropagation.td @@ -0,0 +1,100 @@ +#ifndef LIB_TRANSFORMS_LAYOUTPROPAGATION_LAYOUTPROPAGATION_TD_ +#define LIB_TRANSFORMS_LAYOUTPROPAGATION_LAYOUTPROPAGATION_TD_ + +include "mlir/Pass/PassBase.td" + +def LayoutPropagation : Pass<"layout-propagation"> { + let summary = "Propagate ciphertext layouts through the IR"; + let description = [{ + This pass performs a forward propagation of layout (packing) information + through the input IR, starting from the assumption that each function + argument tensor has a row-major layout. + + The chosen layouts (`affine_map`s) are annotated on ops throughout the IR. In particular, + + - Ops with a nested region and block arguments use a dictionary attribute to + mark the layout of each block argument. `func.func` in particular uses the + `tensor_ext.layout` dialect attribute, while others use an affine map + attribute. + - Other ops annotate their results with layouts as an ArrayAttr of affine + maps. The order of the affine maps corresponds to the order of results. + + When two incompatible layouts are encountered during the propagation, a + `tensor_ext.convert_layout` op is inserted. For an example of two + incompatible layouts, consider the `tensor_ext.sum` operation. Summing along + each of the two axes of a row-major-packed `tensor<32x32xi16>` results in two + `tensor<32xi16>`, but with incompatible layouts: the first has a compact + layout residing in the first 32-entries of a ciphertext, while the second is + a strided layout with a stride of 32. + + The converted op is arbitrarily chosen to have the layout of the first input, + and later passes are responsible for optimizing the choice of which operand + is converted and where the conversion operations are placed. This separation + of duties allows this pass to be reused as a pure dataflow analysis, in which + case it annotates an un-annotated IR with layout attributes. + + Examples: + + Two incompatible summations require a layout conversion + + ```mlir + func.func @incompatible_summed_dims( + %arg0: !secret.secret>, + %arg1: !secret.secret> + ) -> !secret.secret> { + %0 = secret.generic ins( + %arg0, %arg1 : !secret.secret>, !secret.secret>) { + ^bb0(%input0: tensor<32x32xi16>, %input1: tensor<32x32xi16>): + %1 = tensor_ext.sum %input0, 0 : tensor<32x32xi16> -> tensor<32xi16> + %2 = tensor_ext.sum %input1, 1 : tensor<32x32xi16> -> tensor<32xi16> + %3 = arith.addi %1, %2 : tensor<32xi16> + secret.yield %3 : tensor<32xi16> + } -> !secret.secret> + return %0 : !secret.secret> + } + ``` + + This pass produces: + + ```mlir + #map = affine_map<(d0, d1) -> (d0 * 32 + d1)> + #map1 = affine_map<(d0) -> (d0)> + #map2 = affine_map<(d0) -> (d0 * 32)> + + func.func @incompatible_summed_dims( + %arg0: !secret.secret> + {tensor_ext.layout = #tensor_ext.layout (d0 * 32 + d1)>}, + %arg1: !secret.secret> + {tensor_ext.layout = #tensor_ext.layout (d0 * 32 + d1)>} + ) -> !secret.secret> { + %0 = secret.generic ins( + %arg0, %arg1 : !secret.secret>, !secret.secret>) + attrs = { + arg0 = {layout = #map}, + arg1 = {layout = #map}, + layout = [#map1] + } { + ^bb0(%input0: tensor<32x32xi16>, %input1: tensor<32x32xi16>): + %1 = tensor_ext.sum %input0, 0 {layout = [#map1]} : tensor<32x32xi16> -> tensor<32xi16> + %2 = tensor_ext.sum %input1, 1 {layout = [#map2]} : tensor<32x32xi16> -> tensor<32xi16> + %3 = tensor_ext.convert_layout %2 { + from_layout = #map2, + to_layout = #map1, + layout = [#map1]} : tensor<32xi16> + %4 = arith.addi %1, %3 {layout = [#map1]} : tensor<32xi16> + secret.yield %4 : tensor<32xi16> + } -> !secret.secret> + return %0 : !secret.secret> + } + ``` + }]; + let dependentDialects = [ + "mlir::arith::ArithDialect", + "mlir::func::FuncDialect", + "mlir::heir::secret::SecretDialect", + "mlir::heir::tensor_ext::TensorExtDialect", + "mlir::tensor::TensorDialect", + ]; +} + +#endif // LIB_TRANSFORMS_LAYOUTPROPAGATION_LAYOUTPROPAGATION_TD_ diff --git a/lib/Utils/Utils.h b/lib/Utils/Utils.h index f79d18f7b..f8c0a08d0 100644 --- a/lib/Utils/Utils.h +++ b/lib/Utils/Utils.h @@ -5,6 +5,7 @@ #include #include +#include "mlir/include/mlir/IR/Dialect.h" // from @llvm-project #include "mlir/include/mlir/IR/Operation.h" // from @llvm-project #include "mlir/include/mlir/IR/Types.h" // from @llvm-project #include "mlir/include/mlir/IR/Value.h" // from @llvm-project diff --git a/tests/Dialect/TensorExt/IR/ops.mlir b/tests/Dialect/TensorExt/IR/ops.mlir index 6c200e227..2bc332cd8 100644 --- a/tests/Dialect/TensorExt/IR/ops.mlir +++ b/tests/Dialect/TensorExt/IR/ops.mlir @@ -14,3 +14,8 @@ func.func @test_convert_layout(%0: tensor<16x16xi32>) -> tensor<16x16xi32> { %1 = tensor_ext.convert_layout %0 {from_layout = #row_major, to_layout = #col_major} : tensor<16x16xi32> return %1 : tensor<16x16xi32> } + +func.func @test_assign_layout(%0: tensor<16x16xi32>) -> tensor<16x16xi32> { + %1 = tensor_ext.assign_layout %0 {layout = #row_major} : tensor<16x16xi32> + return %1 : tensor<16x16xi32> +} diff --git a/tests/Transforms/layout_propagation/BUILD b/tests/Transforms/layout_propagation/BUILD new file mode 100644 index 000000000..c571e6fc6 --- /dev/null +++ b/tests/Transforms/layout_propagation/BUILD @@ -0,0 +1,10 @@ +load("//bazel:lit.bzl", "glob_lit_tests") + +package(default_applicable_licenses = ["@heir//:license"]) + +glob_lit_tests( + name = "all_tests", + data = ["@heir//tests:test_utilities"], + driver = "@heir//tests:run_lit.sh", + test_file_exts = ["mlir"], +) diff --git a/tests/Transforms/layout_propagation/elementwise_add.mlir b/tests/Transforms/layout_propagation/elementwise_add.mlir new file mode 100644 index 000000000..eb2944863 --- /dev/null +++ b/tests/Transforms/layout_propagation/elementwise_add.mlir @@ -0,0 +1,16 @@ +// RUN: heir-opt --layout-propagation %s | FileCheck %s + +!stensor = !secret.secret> +#row_major = affine_map<(i, j) -> (32*i + j)> + +// Just test that the layout propagation pass runs, even though no layout +// conversion ops are inserted. +// CHECK-LABEL: elementwise_sum +func.func @elementwise_sum(%arg0: !stensor, %arg1: !stensor) -> !stensor { + %0 = secret.generic ins(%arg0, %arg1: !stensor, !stensor) { + ^body(%pt_arg0: tensor<32x32xi16>, %pt_arg1: tensor<32x32xi16>): + %3 = arith.addi %pt_arg0, %pt_arg1: tensor<32x32xi16> + secret.yield %3 : tensor<32x32xi16> + } -> !stensor + return %0 : !stensor +} diff --git a/tests/Transforms/layout_propagation/insert_conversion.mlir b/tests/Transforms/layout_propagation/insert_conversion.mlir new file mode 100644 index 000000000..cb6e9f8df --- /dev/null +++ b/tests/Transforms/layout_propagation/insert_conversion.mlir @@ -0,0 +1,64 @@ +// RUN: heir-opt --layout-propagation %s | FileCheck %s + +!tensor = tensor<32x32xi16> +!tensor2 = tensor<32xi16> +!stensor = !secret.secret +!stensor2 = !secret.secret + +// Test that when an operation changes the tensor layour in an incompatible way, +// a layout conversion operation is inserted. + +// CHECK: [[input_map:#[^ ]*]] = affine_map<(d0, d1) -> (d0 * 32 + d1)> +// CHECK: [[row_reduced_map:#[^ ]*]] = affine_map<(d0) -> (d0)> +// CHECK: [[col_reduced_map:#[^ ]*]] = affine_map<(d0) -> (d0 * 32)> + +// CHECK: insert_conversion +// CHECK-SAME: %[[arg0:[^:]+]]: !secret.secret> {tensor_ext.layout = #tensor_ext.layout (d0 * 32 + d1)>} +// CHECK-SAME: %[[arg1:[^:]+]]: !secret.secret> {tensor_ext.layout = #tensor_ext.layout (d0 * 32 + d1)>} +func.func @insert_conversion(%arg0: !stensor, %arg1: !stensor) -> !stensor2 { + // CHECK: [[init0:%.*]] = arith.constant dense<0> + // CHECK: [[init1:%.*]] = arith.constant dense<0> + %out_1 = arith.constant dense<0> : !tensor2 + %out_2 = arith.constant dense<0> : !tensor2 + + // CHECK: secret.generic + // CHECK-SAME: ins(%[[arg0]], %[[arg1]] + // CHECK-SAME: [[arg0]] = {layout = [[input_map]]} + // CHECK-SAME: [[arg1]] = {layout = [[input_map]]} + // Note this one denotes the layout of the result of the generic op + // CHECK-SAME: layout = [ + // CHECK-SAME: [[row_reduced_map]]] + %0 = secret.generic ins(%arg0, %arg1: !stensor, !stensor) { + ^body(%pt_arg0: !tensor, %pt_arg1: !tensor): + // CHECK: tensor_ext.assign_layout [[init0]] {layout = [[row_reduced_map]]} + + // result of sum has row-major layout, i.e., with implicit repetition at the end + // (1, 2, ..., 32, 1, 2, ..., 32, ...) + // CHECK: [[unconverted:[^ ]+]] = linalg.reduce + // CHECK-SAME: {layout = [[[row_reduced_map]]]} + %1 = linalg.reduce { arith.addi } ins(%pt_arg0:!tensor) outs(%out_1:!tensor2) dimensions = [0] + + // CHECK: tensor_ext.assign_layout [[init1]] + // CHECK-sAME: layout = [[row_reduced_map]] + // CHECK: tensor_ext.convert_layout + // CHECK-SAME: from_layout = [[row_reduced_map]] + // CHECK-SAME: to_layout = [[col_reduced_map]] + + // result of sum has column-major layout, i.e., strided + // (1, x, ..., x, 2, x, ..., x, 3, x, ..., x, ...) + // At this stage, layout inference would annotate this with #strided attr + // CHECK: [[to_convert:%.+]] = linalg.reduce + // CHECK-SAME: {layout = [[[col_reduced_map]]]} + %2 = linalg.reduce { arith.addi } ins(%pt_arg1:!tensor) outs(%out_2:!tensor2) dimensions = [1] + + // CHECK: [[converted:%.+]] = tensor_ext.convert_layout [[to_convert]] + // CHECK-SAME: from_layout = [[col_reduced_map]] + // CHECK-SAME: layout = [ + // CHECK-SAME: [[row_reduced_map]]] + // CHECK-SAME: to_layout = [[row_reduced_map]] + // CHECK: arith.addi [[unconverted]], [[converted]] + %3 = arith.addi %1, %2 : !tensor2 + secret.yield %3 : !tensor2 + } -> !stensor2 + return %0 : !stensor2 +} diff --git a/tools/BUILD b/tools/BUILD index e5bc74863..5225503ce 100644 --- a/tools/BUILD +++ b/tools/BUILD @@ -101,6 +101,7 @@ cc_binary( "@heir//lib/Transforms/ForwardInsertToExtract", "@heir//lib/Transforms/ForwardStoreToLoad", "@heir//lib/Transforms/FullLoopUnroll", + "@heir//lib/Transforms/LayoutPropagation", "@heir//lib/Transforms/LinalgCanonicalizations", "@heir//lib/Transforms/MemrefToArith:ExpandCopy", "@heir//lib/Transforms/MemrefToArith:MemrefToArithRegistration", diff --git a/tools/heir-opt.cpp b/tools/heir-opt.cpp index 50c55c0ea..49831eddc 100644 --- a/tools/heir-opt.cpp +++ b/tools/heir-opt.cpp @@ -63,6 +63,7 @@ #include "lib/Transforms/ForwardInsertToExtract/ForwardInsertToExtract.h" #include "lib/Transforms/ForwardStoreToLoad/ForwardStoreToLoad.h" #include "lib/Transforms/FullLoopUnroll/FullLoopUnroll.h" +#include "lib/Transforms/LayoutPropagation/LayoutPropagation.h" #include "lib/Transforms/LinalgCanonicalizations/LinalgCanonicalizations.h" #include "lib/Transforms/OperationBalancer/OperationBalancer.h" #include "lib/Transforms/OptimizeRelinearization/OptimizeRelinearization.h" @@ -276,6 +277,7 @@ int main(int argc, char **argv) { registerStraightLineVectorizerPasses(); registerUnusedMemRefPasses(); registerOptimizeRelinearizationPasses(); + registerLayoutPropagationPasses(); registerLinalgCanonicalizationsPasses(); registerTensorToScalarsPasses(); // Register yosys optimizer pipeline if configured.