diff --git a/lib/Dialect/TensorExt/IR/TensorExtAttributes.td b/lib/Dialect/TensorExt/IR/TensorExtAttributes.td
index 9bac27eac..879c7b37d 100644
--- a/lib/Dialect/TensorExt/IR/TensorExtAttributes.td
+++ b/lib/Dialect/TensorExt/IR/TensorExtAttributes.td
@@ -64,4 +64,20 @@ def SIMDPacking_Attr : TensorExt_Attr<"SIMDPacking", "simd_packing",
   let assemblyFormat =  "`<` struct(params) `>`";
 }
 
+def TensorExt_LayoutAttr : TensorExt_Attr<"Layout", "layout"> {
+  let summary = "Attribute denoting the layout of a tensor in a set of ciphertexts";
+  let description = [{
+    This attribute contains an affine map that describes the layout of a tensor
+    in a set of ciphertexts. The affine map is a function that maps tensor indices
+    to ciphertext indices (possibly with a ciphertext-selecting index).
+
+    This attribute exists primarily to provide a "dialect attribute" which is
+    required to annotate the arguments of `func.func` arguments.
+  }];
+
+  let parameters = (ins "AffineMap": $layout);
+  let assemblyFormat = "`<` struct(params) `>`";
+}
+
+
 #endif  // LIB_DIALECT_TENSOREXT_IR_TENSOREXTATTRIBUTES_TD_
diff --git a/lib/Dialect/TensorExt/IR/TensorExtDialect.td b/lib/Dialect/TensorExt/IR/TensorExtDialect.td
index 12c44afec..0815f2785 100644
--- a/lib/Dialect/TensorExt/IR/TensorExtDialect.td
+++ b/lib/Dialect/TensorExt/IR/TensorExtDialect.td
@@ -17,6 +17,12 @@ def TensorExt_Dialect : Dialect {
     "tensor::TensorDialect",
   ];
 
+  let extraClassDeclaration = [{
+    constexpr const static ::llvm::StringLiteral
+        kLayoutAttrName = "tensor_ext.layout";
+  }];
+
+
   let useDefaultAttributePrinterParser = 1;
 }
 
diff --git a/lib/Dialect/TensorExt/IR/TensorExtOps.cpp b/lib/Dialect/TensorExt/IR/TensorExtOps.cpp
index a3ea13e15..fb5dd5cf8 100644
--- a/lib/Dialect/TensorExt/IR/TensorExtOps.cpp
+++ b/lib/Dialect/TensorExt/IR/TensorExtOps.cpp
@@ -37,28 +37,44 @@ LogicalResult RotateOp::verify() {
   return success();
 }
 
-LogicalResult ConvertLayoutOp::verify() {
-  int64_t rank = cast<RankedTensorType>(getTensor().getType()).getRank();
-  const AffineMap &fromLayout = getFromLayout().getValue();
-  const AffineMap &toLayout = getToLayout().getValue();
-
-  if (rank != fromLayout.getNumDims() || rank != toLayout.getNumDims()) {
-    std::string fromLayoutStr, toLayoutStr;
-    llvm::raw_string_ostream fromLayoutStream(fromLayoutStr),
-        toLayoutStream(toLayoutStr);
-    fromLayout.print(fromLayoutStream);
-    toLayout.print(toLayoutStream);
+LogicalResult verifyLayoutMatchesType(const AffineMap &layout, Type type,
+                                      Operation *op) {
+  int64_t rank = cast<ShapedType>(type).getRank();
+  if (rank != layout.getNumDims()) {
+    std::string layoutStr;
+    llvm::raw_string_ostream os(layoutStr);
+    layout.print(os);
 
-    return emitOpError()
+    return op->emitOpError()
            << "requires tensor rank to match the layout map's dimension count"
-              "but found rank "
-           << rank << " and maps " << fromLayoutStream.str() << " and "
-           << toLayoutStream.str();
+              " but found rank "
+           << rank << " and map " << os.str();
+  }
+
+  return success();
+}
+
+LogicalResult ConvertLayoutOp::verify() {
+  LogicalResult inputVerification = verifyLayoutMatchesType(
+      getFromLayout().getValue(), getTensor().getType(), *this);
+  if (failed(inputVerification)) {
+    return inputVerification;
+  }
+
+  LogicalResult outputVerification = verifyLayoutMatchesType(
+      getToLayout().getValue(), getResult().getType(), *this);
+  if (failed(outputVerification)) {
+    return outputVerification;
   }
 
   return success();
 }
 
+LogicalResult AssignLayoutOp::verify() {
+  return verifyLayoutMatchesType(getLayout().getValue(), getTensor().getType(),
+                                 *this);
+}
+
 }  // namespace tensor_ext
 }  // namespace heir
 }  // namespace mlir
diff --git a/lib/Dialect/TensorExt/IR/TensorExtOps.td b/lib/Dialect/TensorExt/IR/TensorExtOps.td
index 80d4a8f1d..f42033518 100644
--- a/lib/Dialect/TensorExt/IR/TensorExtOps.td
+++ b/lib/Dialect/TensorExt/IR/TensorExtOps.td
@@ -58,10 +58,31 @@ def TensorExt_ConvertLayoutOp : TensorExt_Op<"convert_layout", [Pure, AllTypesMa
 
     This op is inserted by layout selection passes.
   }];
+
   let assemblyFormat = "operands attr-dict `:` type($output)";
   let arguments = (ins AnyRankedTensor:$tensor, Builtin_AffineMapAttr:$from_layout, Builtin_AffineMapAttr:$to_layout);
   let results = (outs AnyRankedTensor:$output);
   let hasVerifier = 1;
 }
 
+def TensorExt_AssignLayoutOp : TensorExt_Op<"assign_layout", [Pure, AllTypesMatch<["tensor", "output"]>]> {
+  let summary = "Assign a layout to a plaintext tensor.";
+  let description = [{
+    This op allows the ingestion of a plaintext tensor into the layout system.
+    For example, ops like `linalg.reduce`, require a tensor input to represent
+    initial values. These will generally be created by an `arith.constant` or
+    `tensor.empty` op, which does not have secret results. Lowerings will
+    convert this to a packed plaintext, so that the subsequent ops can be
+    lowered as ciphertext-plaintext ops.
+
+    This op is inserted by layout selection passes.
+  }];
+
+  let assemblyFormat = "operands attr-dict `:` type($output)";
+  let arguments = (ins AnyRankedTensor:$tensor, Builtin_AffineMapAttr:$layout);
+  let results = (outs AnyRankedTensor:$output);
+  let hasVerifier = 1;
+}
+
+
 #endif  // LIB_DIALECT_TENSOREXT_IR_TENSOREXTOPS_TD_
diff --git a/lib/Pipelines/ArithmeticPipelineRegistration.cpp b/lib/Pipelines/ArithmeticPipelineRegistration.cpp
index a109ecc64..92b944968 100644
--- a/lib/Pipelines/ArithmeticPipelineRegistration.cpp
+++ b/lib/Pipelines/ArithmeticPipelineRegistration.cpp
@@ -20,7 +20,10 @@
 #include "lib/Dialect/TensorExt/Transforms/RotateAndReduce.h"
 #include "lib/Pipelines/PipelineRegistration.h"
 #include "lib/Transforms/ApplyFolders/ApplyFolders.h"
+#include "lib/Transforms/DropUnitDims/DropUnitDims.h"
+#include "lib/Transforms/ForwardStoreToLoad/ForwardStoreToLoad.h"
 #include "lib/Transforms/FullLoopUnroll/FullLoopUnroll.h"
+#include "lib/Transforms/LayoutPropagation/LayoutPropagation.h"
 #include "lib/Transforms/LinalgCanonicalizations/LinalgCanonicalizations.h"
 #include "lib/Transforms/OperationBalancer/OperationBalancer.h"
 #include "lib/Transforms/OptimizeRelinearization/OptimizeRelinearization.h"
@@ -82,8 +85,15 @@ void mlirToSecretArithmeticPipelineBuilder(OpPassManager &pm) {
   pm.addPass(createCanonicalizerPass());
   pm.addPass(createCSEPass());
 
-  // Apply linalg kernels
+  // Linalg canonicalization
+  // TODO(#1191): enable dropping unit dims to convert matmul to matvec/vecmat
+  // pm.addPass(createDropUnitDims());
   pm.addPass(createLinalgCanonicalizations());
+
+  // Layout assignment and lowering
+  // TODO(#1191): enable layout propagation after implementing the rest
+  // of the layout lowering pipeline.
+  // pm.addPass(createLayoutPropagation());
   pm.addPass(heir::linalg::createLinalgToTensorExt());
 
   // Vectorize and optimize rotations
diff --git a/lib/Pipelines/BUILD b/lib/Pipelines/BUILD
index 28c1f9eaa..9a59ab218 100644
--- a/lib/Pipelines/BUILD
+++ b/lib/Pipelines/BUILD
@@ -104,8 +104,10 @@ cc_library(
         "@heir//lib/Dialect/TensorExt/Transforms:InsertRotate",
         "@heir//lib/Dialect/TensorExt/Transforms:RotateAndReduce",
         "@heir//lib/Transforms/ApplyFolders",
+        "@heir//lib/Transforms/DropUnitDims",
         "@heir//lib/Transforms/ForwardStoreToLoad",
         "@heir//lib/Transforms/FullLoopUnroll",
+        "@heir//lib/Transforms/LayoutPropagation",
         "@heir//lib/Transforms/LinalgCanonicalizations",
         "@heir//lib/Transforms/MemrefToArith:ExpandCopy",
         "@heir//lib/Transforms/MemrefToArith:MemrefToArithRegistration",
diff --git a/lib/Transforms/LayoutPropagation/BUILD b/lib/Transforms/LayoutPropagation/BUILD
new file mode 100644
index 000000000..da126a384
--- /dev/null
+++ b/lib/Transforms/LayoutPropagation/BUILD
@@ -0,0 +1,32 @@
+load("@heir//lib/Transforms:transforms.bzl", "add_heir_transforms")
+
+package(
+    default_applicable_licenses = ["@heir//:license"],
+    default_visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "LayoutPropagation",
+    srcs = ["LayoutPropagation.cpp"],
+    hdrs = ["LayoutPropagation.h"],
+    deps = [
+        ":pass_inc_gen",
+        "@heir//lib/Analysis/SecretnessAnalysis",
+        "@heir//lib/Dialect/Secret/IR:Dialect",
+        "@heir//lib/Dialect/TensorExt/IR:Dialect",
+        "@llvm-project//mlir:ArithDialect",
+        "@llvm-project//mlir:FuncDialect",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:LinalgDialect",
+        "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:Support",
+        "@llvm-project//mlir:TensorDialect",
+        "@llvm-project//mlir:Transforms",
+    ],
+)
+
+add_heir_transforms(
+    generated_target_name = "pass_inc_gen",
+    pass_name = "LayoutPropagation",
+    td_file = "LayoutPropagation.td",
+)
diff --git a/lib/Transforms/LayoutPropagation/LayoutPropagation.cpp b/lib/Transforms/LayoutPropagation/LayoutPropagation.cpp
new file mode 100644
index 000000000..b194576e5
--- /dev/null
+++ b/lib/Transforms/LayoutPropagation/LayoutPropagation.cpp
@@ -0,0 +1,684 @@
+#include "lib/Transforms/LayoutPropagation/LayoutPropagation.h"
+
+#include "lib/Analysis/SecretnessAnalysis/SecretnessAnalysis.h"
+#include "lib/Dialect/Secret/IR/SecretOps.h"
+#include "lib/Dialect/Secret/IR/SecretTypes.h"
+#include "lib/Dialect/TensorExt/IR/TensorExtAttributes.h"
+#include "lib/Dialect/TensorExt/IR/TensorExtOps.h"
+#include "llvm/include/llvm/ADT/TypeSwitch.h"  // from @llvm-project
+#include "llvm/include/llvm/Support/Debug.h"   // from @llvm-project
+#include "mlir/include/mlir/Analysis/DataFlow/ConstantPropagationAnalysis.h"  // from @llvm-project
+#include "mlir/include/mlir/Analysis/DataFlow/DeadCodeAnalysis.h"  // from @llvm-project
+#include "mlir/include/mlir/Analysis/DataFlowFramework.h"  // from @llvm-project
+#include "mlir/include/mlir/Dialect/Linalg/IR/Linalg.h"    // from @llvm-project
+#include "mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.h"  // from @llvm-project
+#include "mlir/include/mlir/IR/AffineMap.h"  // from @llvm-project
+
+#define DEBUG_TYPE "layout-propagation"
+
+namespace mlir {
+namespace heir {
+
+using linalg::ReduceOp;
+using linalg::VecmatOp;
+using ::mlir::arith::AddIOp;
+using ::mlir::arith::ConstantOp;
+using ::mlir::arith::MulIOp;
+using secret::GenericOp;
+using secret::SecretType;
+using secret::YieldOp;
+using tensor::CollapseShapeOp;
+using tensor::EmptyOp;
+using tensor::ExpandShapeOp;
+using tensor_ext::AssignLayoutOp;
+using tensor_ext::ConvertLayoutOp;
+using tensor_ext::LayoutAttr;
+
+#define GEN_PASS_DEF_LAYOUTPROPAGATION
+#include "lib/Transforms/LayoutPropagation/LayoutPropagation.h.inc"
+
+// The result of a compatibility check for the layouts of an op's operands (cf.
+// hasCompatibleArgumentLayouts). If the check fails, the presence of a
+// diagnostic signals that the failure is unrecoverable and should cause the
+// pass to fail. If the diagnostic is nullopt, then the failure can be
+// recovered by rectifyIncompatibleOperandLayouts.
+struct CompatibilityResult {
+  bool compatible;
+  std::optional<InFlightDiagnostic> diag;
+};
+
+struct LayoutPropagation : impl::LayoutPropagationBase<LayoutPropagation> {
+  using LayoutPropagationBase::LayoutPropagationBase;
+
+  // Top level visit method handles common logic for all ops, e.g., inserting
+  // conversions.
+  LogicalResult visitOperation(Operation *op);
+
+  // Op-specific transfer functions
+  LogicalResult visitOperation(AddIOp op);
+  LogicalResult visitOperation(CollapseShapeOp op);
+  LogicalResult visitOperation(ConstantOp op);
+  LogicalResult visitOperation(EmptyOp op);
+  LogicalResult visitOperation(ExpandShapeOp op);
+  LogicalResult visitOperation(GenericOp op);
+  LogicalResult visitOperation(MulIOp op);
+  LogicalResult visitOperation(ReduceOp op);
+  LogicalResult visitOperation(VecmatOp op);
+  LogicalResult visitOperation(YieldOp op);
+  LogicalResult visitOperation(func::FuncOp op);
+  LogicalResult visitOperation(func::ReturnOp op);
+
+  // Return true if the operand layouts are compatible for the operation, and
+  // false if not. Include an InFlightDiagnostic if an operand is encountered
+  // that requires a layout, but none has been set.
+  CompatibilityResult hasCompatibleArgumentLayouts(Operation *op);
+
+  // Op-specific compatibility functions
+  CompatibilityResult hasCompatibleArgumentLayouts(ReduceOp op);
+  CompatibilityResult hasCompatibleArgumentLayouts(VecmatOp op);
+
+  // Insert conversion ops to rectify incompatible operand layouts
+  void rectifyIncompatibleOperandLayouts(Operation *op);
+
+  // Op-specific overrides
+  void rectifyIncompatibleOperandLayouts(ReduceOp op);
+
+  // Return the default layout for a given type
+  FailureOr<AffineMap> defaultLayoutForType(Type type);
+
+  // Helper to pass layouts through generic ops
+  void passLayoutThroughOp(Operation *op);
+
+  // Add an op attribute denoting the layouts of the op results. Assumes the
+  // assignedLayouts map contains the layout for the result SSA values already.
+  void setResultLayoutAttr(Operation *op);
+
+  void runOnOperation() override;
+
+  DenseMap<Value, AffineMap> assignedLayouts;
+  DataFlowSolver *solver;
+};
+
+void visitDebugInfo(Operation *op) {
+  LLVM_DEBUG(llvm::dbgs() << "Visiting: " << op->getName() << "\n");
+}
+
+void debugAssignLayout(Value value, AffineMap layout) {
+  LLVM_DEBUG(llvm::dbgs() << "Assigning layout " << layout << " to value "
+                          << value << "\n");
+}
+
+LogicalResult LayoutPropagation::visitOperation(Operation *op) {
+  visitDebugInfo(op);
+
+  if (!isa<func::FuncOp, func::ReturnOp, GenericOp, YieldOp>(op) &&
+      !isSecret(op->getOperands(), solver) &&
+      !isSecret(op->getResults(), solver)) {
+    LLVM_DEBUG(llvm::dbgs()
+               << "Skipping op " << op->getName()
+               << " because its operands and results are non-secret, or it is "
+                  "in a special allowlist of ops to ignore\n");
+    return success();
+  }
+
+  // If an operand has no layout, it may for example be produced as a plaintext
+  // constant, such as a zero-valued tensor for the initializer of a reduction.
+  // In this case, we insert a layout assignment.
+  for (auto operand : op->getOperands()) {
+    if (!assignedLayouts.contains(operand)) {
+      if (isa<RankedTensorType>(operand.getType())) {
+        LLVM_DEBUG(llvm::dbgs() << "tensor operand " << operand
+                                << " has no layout assigned\n");
+        FailureOr<AffineMap> layout = defaultLayoutForType(operand.getType());
+        if (failed(layout)) {
+          return failure();
+        }
+        mlir::IRRewriter builder(&getContext());
+        builder.setInsertionPoint(op);
+        AssignLayoutOp assignLayoutOp = builder.create<AssignLayoutOp>(
+            op->getLoc(), operand, AffineMapAttr::get(layout.value()));
+        Value toReplace = assignLayoutOp.getResult();
+        builder.replaceAllUsesExcept(operand, toReplace, assignLayoutOp);
+        debugAssignLayout(toReplace, layout.value());
+        assignedLayouts.insert({toReplace, layout.value()});
+      }
+    }
+  }
+
+  auto [compatible, diag] = hasCompatibleArgumentLayouts(op);
+  if (!compatible) {
+    if (diag.has_value()) {
+      // An InFlightDiagnostic casts to a failure()
+      return diag.value();
+    }
+    rectifyIncompatibleOperandLayouts(op);
+  }
+
+  return TypeSwitch<Operation *, LogicalResult>(op)
+      // func ops
+      .Case<func::FuncOp, func::ReturnOp>(
+          [&](auto op) { return visitOperation(op); })
+      // arith ops
+      .Case<AddIOp, ConstantOp, MulIOp>(
+          [&](auto op) { return visitOperation(op); })
+      // secret ops
+      .Case<GenericOp, YieldOp>([&](auto op) { return visitOperation(op); })
+      // linalg ops
+      .Case<VecmatOp, ReduceOp>([&](auto op) { return visitOperation(op); })
+      // tensor ops
+      .Case<CollapseShapeOp, ExpandShapeOp, EmptyOp>(
+          [&](auto op) { return visitOperation(op); })
+      .Default([&](Operation *op) { return success(); });
+}
+
+CompatibilityResult LayoutPropagation::hasCompatibleArgumentLayouts(
+    Operation *op) {
+  return TypeSwitch<Operation *, CompatibilityResult>(op)
+      // Trivially true ops
+      .Case<func::FuncOp, GenericOp, YieldOp>(
+          [&](auto op) { return CompatibilityResult{true, std::nullopt}; })
+      // Ops with special rules
+      .Case<ReduceOp, VecmatOp>(
+          [&](auto op) { return hasCompatibleArgumentLayouts(op); })
+      // By default, assume operands must all have the same layout.
+      .Default([&](Operation *op) {
+        std::optional<AffineMap> firstFoundLayout;
+
+        for (auto &operand : op->getOpOperands()) {
+          if (isa<RankedTensorType>(operand.get().getType())) {
+            if (!assignedLayouts.contains(operand.get())) {
+              // If the operand has no layout, we can't propagate layout
+              // information to the result.
+              return CompatibilityResult{
+                  false, op->emitError("operand has no assigned layout")};
+            }
+            AffineMap layout = assignedLayouts.at(operand.get());
+
+            if (!firstFoundLayout.has_value()) firstFoundLayout = layout;
+            if (layout != firstFoundLayout.value()) {
+              return CompatibilityResult{false, std::nullopt};
+            }
+          }
+        }
+
+        return CompatibilityResult{true, std::nullopt};
+      });
+}
+
+// A helper to convert the layout of an input tensor to a reduce op. The result
+// layout is equivalent to reducing the summed dimensions to 1 and then
+// dropping them.
+//
+// TODO(1352): Determine if/how to support repetition in the layout.
+AffineMap convertLayoutForReduce(AffineMap inputLayout,
+                                 ArrayRef<int64_t> dimsToReduce) {
+  unsigned numDims = inputLayout.getNumDims();
+  llvm::SmallBitVector dimsBV(numDims, false);
+  for (int dimToSum : dimsToReduce) dimsBV.set(dimToSum);
+  return projectDims(inputLayout, dimsBV, /*compressDims=*/true);
+}
+
+CompatibilityResult LayoutPropagation::hasCompatibleArgumentLayouts(
+    ReduceOp op) {
+  // The arguments of a ReduceOp are the tensor(s) to reduce and the
+  // initializer values for the reduction.
+  for (const auto &[input, init] : llvm::zip(op.getInputs(), op.getInits())) {
+    if (!assignedLayouts.contains(input)) {
+      return {false, op->emitError("input tensor has no assigned layout")};
+    }
+    if (!assignedLayouts.contains(init)) {
+      return {false,
+              op->emitError("initializer tensor has no assigned layout")};
+    }
+
+    AffineMap inputLayout = assignedLayouts.at(input);
+    AffineMap initLayout = assignedLayouts.at(init);
+    AffineMap reducedInputLayout =
+        convertLayoutForReduce(inputLayout, op.getDimensions());
+
+    if (reducedInputLayout != initLayout) {
+      return {false, std::nullopt};
+    }
+  }
+
+  return {true, std::nullopt};
+}
+
+CompatibilityResult LayoutPropagation::hasCompatibleArgumentLayouts(
+    VecmatOp op) {
+  // Currently only support secret vectors and plaintext matrices.
+  linalg::ContractionOpInterface vecmatOp =
+      cast<linalg::ContractionOpInterface>(op.getOperation());
+  Value vec = vecmatOp.lhs();
+  Value mat = vecmatOp.rhs();
+  if (isSecret(mat, solver) || !isSecret(vec, solver)) {
+    return {false,
+            op->emitError("Only secret vectors and plaintext matrices are "
+                          "supported for linalg.vecmat")};
+  }
+
+  if (!assignedLayouts.contains(vec)) {
+    return {false, op->emitError("vector operand has no assigned layout")};
+  }
+  return {true, std::nullopt};
+}
+
+void LayoutPropagation::rectifyIncompatibleOperandLayouts(Operation *op) {
+  LLVM_DEBUG({
+    auto diag = op->emitRemark() << "Inserting layout conversion op due to "
+                                    "disagreeing operand layouts";
+    auto &note = diag.attachNote();
+    for (auto operand : op->getOperands()) {
+      std::string mapStr;
+      llvm::raw_string_ostream os(mapStr);
+      AffineMap operandLayout;
+      if (assignedLayouts.contains(operand))
+        operandLayout = assignedLayouts.at(operand);
+      operandLayout.print(os);
+      note << "\n- Operand: " << operand << "; Layout: " << os.str();
+    }
+  });
+
+  TypeSwitch<Operation *>(op)
+      // Ops with special rules
+      .Case<ReduceOp>(
+          [&](auto op) { return rectifyIncompatibleOperandLayouts(op); })
+      .Default([&](Operation *op) {
+        // Default target layout is chosen arbitrarily as the first operand's
+        // layout for now. A different pass is responsible for optimizing the
+        // placement and mechanics of the layout conversion ops.
+        mlir::IRRewriter builder(&getContext());
+        const auto it = llvm::find_if(op->getOperands(), [this](Value pair) {
+          return assignedLayouts.contains(pair);
+        });
+        AffineMap targetLayout = assignedLayouts.at(*it);
+
+        for (auto &opOperand : op->getOpOperands()) {
+          if (!assignedLayouts.contains(opOperand.get())) continue;
+          AffineMap sourceLayout = assignedLayouts.at(opOperand.get());
+
+          if (sourceLayout != targetLayout) {
+            builder.setInsertionPoint(op);
+            ConvertLayoutOp convertOp = builder.create<ConvertLayoutOp>(
+                op->getLoc(), opOperand.get(), AffineMapAttr::get(sourceLayout),
+                AffineMapAttr::get(targetLayout));
+
+            // Layout of the result is the same as the target layout of the
+            // conversion. Mostly this is done for consistency: all ops have an
+            // attribute describing the layout of their results.
+            OpBuilder builder(&getContext());
+            assignedLayouts.insert({convertOp.getResult(), targetLayout});
+            setResultLayoutAttr(convertOp);
+            op->setOperand(opOperand.getOperandNumber(), convertOp.getResult());
+          }
+        }
+      });
+}
+
+void LayoutPropagation::rectifyIncompatibleOperandLayouts(ReduceOp op) {
+  mlir::IRRewriter builder(&getContext());
+  builder.setInsertionPoint(op);
+
+  for (const auto &[input, init] : llvm::zip(op.getInputs(), op.getInits())) {
+    AffineMap inputLayout = assignedLayouts.at(input);
+    AffineMap initLayout = assignedLayouts.at(init);
+    AffineMap reducedInputLayout =
+        convertLayoutForReduce(inputLayout, op.getDimensions());
+
+    if (reducedInputLayout != initLayout) {
+      ConvertLayoutOp convertOp = builder.create<ConvertLayoutOp>(
+          op->getLoc(), init, AffineMapAttr::get(initLayout),
+          AffineMapAttr::get(reducedInputLayout));
+      Value toReplace = convertOp.getResult();
+      // I'd like to just call op.setOperand(i, toReplace) here, but I can't
+      // figure out how the i'th entry in `getInits` corresponds to the general
+      // OpOperand index.
+      builder.replaceUsesWithIf(init, toReplace, [&](OpOperand &operand) {
+        return operand.getOwner() == op;
+      });
+      assignedLayouts.insert({toReplace, reducedInputLayout});
+      setResultLayoutAttr(convertOp);
+    }
+  }
+}
+
+LogicalResult LayoutPropagation::visitOperation(func::FuncOp op) {
+  // Set a default value for each argument
+  int argIndex = 0;
+  for (Value arg : op.getArguments()) {
+    FailureOr<AffineMap> layout = defaultLayoutForType(arg.getType());
+    if (failed(layout)) {
+      return failure();
+    }
+    debugAssignLayout(arg, layout.value());
+    assignedLayouts.insert({arg, layout.value()});
+
+    // FuncOp requires arg attributes are defined as dialect attributes,
+    // so we can't use an AffineMapAttr here.
+    op.setArgAttr(argIndex, tensor_ext::TensorExtDialect::kLayoutAttrName,
+                  LayoutAttr::get(&getContext(), layout.value()));
+    ++argIndex;
+  }
+
+  // Func result attrs are handled by the ReturnOp
+  return success();
+}
+
+LogicalResult LayoutPropagation::visitOperation(func::ReturnOp op) {
+  func::FuncOp func = op->getParentOfType<func::FuncOp>();
+  for (OpOperand &operand : op->getOpOperands()) {
+    if (!assignedLayouts.contains(operand.get())) {
+      if (isSecret(operand.get(), solver)) {
+        return op->emitError("secret return value has no assigned layout");
+      }
+
+      // It needs no layout.
+      continue;
+    }
+    AffineMap layout = assignedLayouts.at(operand.get());
+    func.setResultAttr(operand.getOperandNumber(),
+                       tensor_ext::TensorExtDialect::kLayoutAttrName,
+                       LayoutAttr::get(&getContext(), layout));
+  }
+  return success();
+}
+
+LogicalResult LayoutPropagation::visitOperation(GenericOp op) {
+  // Every block argument has the same layout as its corresponding operand.
+  for (OpOperand &operand : op->getOpOperands()) {
+    if (!assignedLayouts.contains(operand.get())) {
+      // Assume it is not a tensor type and doesn't need a layout.
+      continue;
+    }
+    AffineMap layout = assignedLayouts.at(operand.get());
+    BlockArgument blockArg =
+        op.getRegion().getArgument(operand.getOperandNumber());
+    assignedLayouts.insert({blockArg, layout});
+    op.setArgAttr(operand.getOperandNumber(), "layout",
+                  AffineMapAttr::get(layout));
+    debugAssignLayout(blockArg, layout);
+  }
+
+  // The layout of the result of the generic op is handled when the YieldOp is
+  // visited.
+  return success();
+}
+
+LogicalResult LayoutPropagation::visitOperation(YieldOp op) {
+  // The results of the generic op has the same layouts as the yielded values
+  GenericOp generic = op->getParentOfType<GenericOp>();
+  for (OpOperand &operand : op->getOpOperands()) {
+    Type operandType = operand.get().getType();
+    if (!assignedLayouts.contains(operand.get())) {
+      // If it's a tensor type, it may be something like a tensor.empty()
+      // that would not be assigned a layout earlier in the walk, because
+      // it does not depend on any secret information. In this case, use the
+      // default layout.
+      LLVM_DEBUG(llvm::dbgs() << "No layout assigned to operand "
+                              << operand.get() << ", using default layout\n");
+      if (isa<RankedTensorType>(operandType)) {
+        FailureOr<AffineMap> layout = defaultLayoutForType(operandType);
+        if (failed(layout)) {
+          return failure();
+        }
+        debugAssignLayout(operand.get(), layout.value());
+        assignedLayouts.insert({operand.get(), layout.value()});
+      } else {
+        // Assume it is not a tensor type and doesn't need a layout.
+        continue;
+      }
+    }
+    AffineMap layout = assignedLayouts.at(operand.get());
+    Value result = generic.getResult(operand.getOperandNumber());
+    assignedLayouts.insert({result, layout});
+    debugAssignLayout(result, layout);
+  }
+  setResultLayoutAttr(generic);
+  return success();
+}
+
+void LayoutPropagation::passLayoutThroughOp(Operation *op) {
+  // All inputs have the same layout, so just propagate it to all results
+  for (Value result : op->getResults()) {
+    if (isa<RankedTensorType>(result.getType())) {
+      AffineMap layout = assignedLayouts.at(op->getOperand(0));
+      assignedLayouts.insert({result, layout});
+      debugAssignLayout(result, layout);
+    }
+  }
+  setResultLayoutAttr(op);
+}
+
+LogicalResult LayoutPropagation::visitOperation(ConstantOp op) {
+  // Constant ops can take any layout, but to start they are implemented to have
+  // row-major layouts. But if a later pass back-propagates a layout from a
+  // later op, an EmptyOp can trivially take on that changed layout.
+  Value result = op.getResult();
+  FailureOr<AffineMap> layout = defaultLayoutForType(result.getType());
+  if (failed(layout)) {
+    return failure();
+  }
+  debugAssignLayout(result, layout.value());
+  assignedLayouts.insert({result, layout.value()});
+  return success();
+}
+
+LogicalResult LayoutPropagation::visitOperation(EmptyOp op) {
+  // Empty ops can take any layout, but to start they are implemented to have
+  // row-major layouts. But if a later pass back-propagates a layout from a
+  // later op, an EmptyOp can trivially take on that changed layout.
+  Value result = op.getResult();
+  FailureOr<AffineMap> layout = defaultLayoutForType(result.getType());
+  if (failed(layout)) {
+    return failure();
+  }
+  debugAssignLayout(result, layout.value());
+  assignedLayouts.insert({result, layout.value()});
+  return success();
+}
+
+LogicalResult LayoutPropagation::visitOperation(CollapseShapeOp op) {
+  // Only support rank-reduced types for now, i.e., where the collapsed
+  // shape only removes static dimensions of size 1.
+  SliceVerificationResult res =
+      isRankReducedType(op.getSrcType(), op.getResultType());
+  if (res != SliceVerificationResult::Success)
+    return op->emitError(
+        "Only rank-reduced types are supported for CollapseShapeOp");
+
+  auto tensor = op.getSrc();
+  AffineMap inputLayout = assignedLayouts.at(tensor);
+  unsigned numDims = tensor.getType().getRank();
+  llvm::SmallBitVector dimsBV(numDims, false);
+
+  for (Attribute associationGroup : op.getReassociation()) {
+    auto associationArray = dyn_cast<ArrayAttr>(associationGroup).getValue();
+    // a single-entry association group is a no-op
+    if (associationArray.size() == 1) {
+      continue;
+    }
+    for (Attribute association : associationArray) {
+      int64_t reassocDim = cast<IntegerAttr>(association).getInt();
+      if (op.getSrcType().getShape()[reassocDim] == 1) dimsBV.set(reassocDim);
+    }
+  }
+
+  AffineMap resultLayout =
+      projectDims(inputLayout, dimsBV, /*compressDims=*/true);
+  assignedLayouts.insert({op.getResult(), resultLayout});
+  setResultLayoutAttr(op);
+  debugAssignLayout(op.getResult(), resultLayout);
+  return success();
+}
+
+LogicalResult LayoutPropagation::visitOperation(ExpandShapeOp op) {
+  MLIRContext *context = &getContext();
+  // Only support rank-reduced types for now, i.e., where the expanded shape
+  // only adds static dimensions of size 1.
+  SliceVerificationResult res =
+      isRankReducedType(op.getResultType(), op.getSrcType());
+  if (res != SliceVerificationResult::Success)
+    return op->emitError(
+        "Only rank-reduced types are supported for ExpandShapeOp");
+
+  auto tensor = op.getSrc();
+  AffineMap inputLayout = assignedLayouts.at(tensor);
+
+  // tensor indices correspond to layout dimensions, and adding a dimension of
+  // size 1 has no effect on the affine map expressions, so all we're doing is
+  // adding new dimensions for each reassociation group index corresponding to
+  // an output dimension of size 1. Mainly we have to ensure that the dimension
+  // we're adding is in the correct index of the affine map's dimension list.
+  int oldDim = 0;
+  DenseMap<AffineExpr, AffineExpr> oldDimsToNewDims;
+  for (Attribute associationGroup : op.getReassociation()) {
+    auto associationArray = dyn_cast<ArrayAttr>(associationGroup).getValue();
+    // a single-entry association group is a no-op
+    if (associationArray.size() == 1) {
+      oldDimsToNewDims[getAffineDimExpr(oldDim, context)] = getAffineDimExpr(
+          cast<IntegerAttr>(associationArray[0]).getInt(), context);
+      ++oldDim;
+      continue;
+    }
+
+    for (Attribute association : associationArray) {
+      int64_t reassocDim = cast<IntegerAttr>(association).getInt();
+      if (op.getResultType().getShape()[reassocDim] > 1) {
+        oldDimsToNewDims[getAffineDimExpr(oldDim, context)] =
+            getAffineDimExpr(reassocDim, context);
+        ++oldDim;
+      }
+    }
+  }
+
+  int resultNumDims = op.getResultType().getRank();
+  // First create a larger-rank affine map, but using old dimension identifiers
+  AffineMap resLayout1 =
+      AffineMap::get(resultNumDims, /*symbolCount=*/0, inputLayout.getResults(),
+                     &getContext());
+
+  // Then replace the old dimension identifier expressions with new ones
+  AffineMap resultLayout = resLayout1.replace(oldDimsToNewDims);
+
+  assignedLayouts.insert({op.getResult(), resultLayout});
+  setResultLayoutAttr(op);
+  debugAssignLayout(op.getResult(), resultLayout);
+  return success();
+}
+
+LogicalResult LayoutPropagation::visitOperation(VecmatOp op) {
+  auto vecmatOp = cast<linalg::ContractionOpInterface>(*op);
+  auto vec = vecmatOp.lhs();
+
+  // The matrix has no assigned layout because it is assumed to be
+  // plaintext/static (this is intended to be enforced by
+  // hasCompatibleArgumentLayouts).
+  AffineMap vecLayout = assignedLayouts.at(vec);
+
+  // Always one result, and it's a vector with the same layout
+  // as the input vector
+  auto result = vecmatOp->getResult(0);
+  AffineMap resultLayout = vecLayout;
+
+  assignedLayouts.insert({result, resultLayout});
+  setResultLayoutAttr(op);
+  debugAssignLayout(result, resultLayout);
+  return success();
+}
+
+LogicalResult LayoutPropagation::visitOperation(AddIOp op) {
+  passLayoutThroughOp(op);
+  return success();
+}
+
+LogicalResult LayoutPropagation::visitOperation(MulIOp op) {
+  passLayoutThroughOp(op);
+  return success();
+}
+
+LogicalResult LayoutPropagation::visitOperation(ReduceOp op) {
+  for (const auto &[tensor, result] :
+       llvm::zip(op.getInputs(), op.getResults())) {
+    AffineMap resultLayout =
+        convertLayoutForReduce(assignedLayouts.at(tensor), op.getDimensions());
+    assignedLayouts.insert({result, resultLayout});
+    debugAssignLayout(result, resultLayout);
+  }
+  setResultLayoutAttr(op);
+  return success();
+}
+
+FailureOr<AffineMap> LayoutPropagation::defaultLayoutForType(Type type) {
+  Type ty = type;
+  if (SecretType secretType = dyn_cast<SecretType>(type)) {
+    ty = secretType.getValueType();
+  }
+
+  // RankedTensorType is laid out by default in row-major order
+  if (RankedTensorType tensorType = dyn_cast<RankedTensorType>(ty)) {
+    unsigned rank = tensorType.getRank();
+    ArrayRef<int64_t> shape = tensorType.getShape();
+    SmallVector<AffineExpr, 4> dims;
+    for (unsigned i = 0; i < rank; ++i) {
+      dims.push_back(getAffineDimExpr(i, type.getContext()));
+    }
+
+    // For a tensor of type tensor<n1xn2xi16>, the row-major layout
+    // would be represented by the AffineMap:
+    //
+    //  (d0, d1) -> (d0 * n2 + d1)
+    //
+    // For a 3-dimension tensor of shape (n1, n2, n3), it would be
+    //
+    //  (d0, d1, d2) -> (d0 * n2 * n3 + d1 * n3 + d2)
+    //
+    // And so on.
+    AffineExpr expr = dims[0];
+    for (unsigned i = 1; i < rank; ++i) {
+      expr = expr * shape[i] + dims[i];
+    }
+
+    return AffineMap::get(rank, /*symbolCount=*/0, expr);
+  }
+
+  return failure();
+}
+
+void LayoutPropagation::setResultLayoutAttr(Operation *op) {
+  OpBuilder builder(&getContext());
+  SmallVector<AffineMap> resultLayouts = llvm::map_to_vector(
+      op->getResults(),
+      [&](Value result) { return assignedLayouts.at(result); });
+  op->setAttr("layout", builder.getAffineMapArrayAttr(resultLayouts));
+}
+
+void LayoutPropagation::runOnOperation() {
+  DataFlowSolver solver;
+  solver.load<dataflow::DeadCodeAnalysis>();
+  solver.load<dataflow::SparseConstantPropagation>();
+  solver.load<SecretnessAnalysis>();
+  if (failed(solver.initializeAndRun(getOperation()))) {
+    getOperation()->emitOpError() << "Failed to run secretness analysis.\n";
+    signalPassFailure();
+    return;
+  }
+  this->solver = &solver;
+
+  LLVM_DEBUG(llvm::dbgs() << "Running layout propagation on operation: "
+                          << getOperation()->getName() << "\n");
+  WalkResult result =
+      getOperation()->walk<WalkOrder::PreOrder>([&](Operation *op) {
+        LogicalResult result = visitOperation(op);
+        if (failed(result)) {
+          return WalkResult::interrupt();
+        }
+        return WalkResult::advance();
+      });
+
+  if (result.wasInterrupted()) {
+    signalPassFailure();
+  }
+};
+
+}  // namespace heir
+}  // namespace mlir
diff --git a/lib/Transforms/LayoutPropagation/LayoutPropagation.h b/lib/Transforms/LayoutPropagation/LayoutPropagation.h
new file mode 100644
index 000000000..ee6a494c1
--- /dev/null
+++ b/lib/Transforms/LayoutPropagation/LayoutPropagation.h
@@ -0,0 +1,23 @@
+#ifndef LIB_TRANSFORMS_LAYOUTPROPAGATION_LAYOUTPROPAGATION_H_
+#define LIB_TRANSFORMS_LAYOUTPROPAGATION_LAYOUTPROPAGATION_H_
+
+#include "lib/Dialect/Secret/IR/SecretDialect.h"
+#include "lib/Dialect/TensorExt/IR/TensorExtDialect.h"
+#include "mlir/include/mlir/Dialect/Arith/IR/Arith.h"    // from @llvm-project
+#include "mlir/include/mlir/Dialect/Func/IR/FuncOps.h"   // from @llvm-project
+#include "mlir/include/mlir/Dialect/Tensor/IR/Tensor.h"  // from @llvm-project
+#include "mlir/include/mlir/Pass/Pass.h"                 // from @llvm-project
+
+namespace mlir {
+namespace heir {
+
+#define GEN_PASS_DECL
+#include "lib/Transforms/LayoutPropagation/LayoutPropagation.h.inc"
+
+#define GEN_PASS_REGISTRATION
+#include "lib/Transforms/LayoutPropagation/LayoutPropagation.h.inc"
+
+}  // namespace heir
+}  // namespace mlir
+
+#endif  // LIB_TRANSFORMS_LAYOUTPROPAGATION_LAYOUTPROPAGATION_H_
diff --git a/lib/Transforms/LayoutPropagation/LayoutPropagation.td b/lib/Transforms/LayoutPropagation/LayoutPropagation.td
new file mode 100644
index 000000000..2d14b2fe3
--- /dev/null
+++ b/lib/Transforms/LayoutPropagation/LayoutPropagation.td
@@ -0,0 +1,100 @@
+#ifndef LIB_TRANSFORMS_LAYOUTPROPAGATION_LAYOUTPROPAGATION_TD_
+#define LIB_TRANSFORMS_LAYOUTPROPAGATION_LAYOUTPROPAGATION_TD_
+
+include "mlir/Pass/PassBase.td"
+
+def LayoutPropagation : Pass<"layout-propagation"> {
+  let summary = "Propagate ciphertext layouts through the IR";
+  let description = [{
+  This pass performs a forward propagation of layout (packing) information
+  through the input IR, starting from the assumption that each function
+  argument tensor has a row-major layout.
+
+  The chosen layouts (`affine_map`s) are annotated on ops throughout the IR. In particular,
+
+  - Ops with a nested region and block arguments use a dictionary attribute to
+    mark the layout of each block argument. `func.func` in particular uses the
+    `tensor_ext.layout` dialect attribute, while others use an affine map
+    attribute.
+  - Other ops annotate their results with layouts as an ArrayAttr of affine
+    maps. The order of the affine maps corresponds to the order of results.
+
+  When two incompatible layouts are encountered during the propagation, a
+  `tensor_ext.convert_layout` op is inserted. For an example of two
+  incompatible layouts, consider the `tensor_ext.sum` operation. Summing along
+  each of the two axes of a row-major-packed `tensor<32x32xi16>` results in two
+  `tensor<32xi16>`, but with incompatible layouts: the first has a compact
+  layout residing in the first 32-entries of a ciphertext, while the second is
+  a strided layout with a stride of 32.
+
+  The converted op is arbitrarily chosen to have the layout of the first input,
+  and later passes are responsible for optimizing the choice of which operand
+  is converted and where the conversion operations are placed. This separation
+  of duties allows this pass to be reused as a pure dataflow analysis, in which
+  case it annotates an un-annotated IR with layout attributes.
+
+  Examples:
+
+  Two incompatible summations require a layout conversion
+
+  ```mlir
+  func.func @incompatible_summed_dims(
+        %arg0: !secret.secret<tensor<32x32xi16>>,
+        %arg1: !secret.secret<tensor<32x32xi16>>
+  ) -> !secret.secret<tensor<32xi16>> {
+    %0 = secret.generic ins(
+      %arg0, %arg1 : !secret.secret<tensor<32x32xi16>>, !secret.secret<tensor<32x32xi16>>) {
+    ^bb0(%input0: tensor<32x32xi16>, %input1: tensor<32x32xi16>):
+      %1 = tensor_ext.sum %input0, 0 : tensor<32x32xi16> -> tensor<32xi16>
+      %2 = tensor_ext.sum %input1, 1 : tensor<32x32xi16> -> tensor<32xi16>
+      %3 = arith.addi %1, %2 : tensor<32xi16>
+      secret.yield %3 : tensor<32xi16>
+    } -> !secret.secret<tensor<32xi16>>
+    return %0 : !secret.secret<tensor<32xi16>>
+  }
+  ```
+
+  This pass produces:
+
+  ```mlir
+  #map = affine_map<(d0, d1) -> (d0 * 32 + d1)>
+  #map1 = affine_map<(d0) -> (d0)>
+  #map2 = affine_map<(d0) -> (d0 * 32)>
+
+  func.func @incompatible_summed_dims(
+    %arg0: !secret.secret<tensor<32x32xi16>>
+      {tensor_ext.layout = #tensor_ext.layout<layout = (d0, d1) -> (d0 * 32 + d1)>},
+    %arg1: !secret.secret<tensor<32x32xi16>>
+      {tensor_ext.layout = #tensor_ext.layout<layout = (d0, d1) -> (d0 * 32 + d1)>}
+  ) -> !secret.secret<tensor<32xi16>> {
+    %0 = secret.generic ins(
+        %arg0, %arg1 : !secret.secret<tensor<32x32xi16>>, !secret.secret<tensor<32x32xi16>>)
+        attrs = {
+          arg0 = {layout = #map},
+          arg1 = {layout = #map},
+          layout = [#map1]
+        } {
+    ^bb0(%input0: tensor<32x32xi16>, %input1: tensor<32x32xi16>):
+      %1 = tensor_ext.sum %input0, 0 {layout = [#map1]} : tensor<32x32xi16> -> tensor<32xi16>
+      %2 = tensor_ext.sum %input1, 1 {layout = [#map2]} : tensor<32x32xi16> -> tensor<32xi16>
+      %3 = tensor_ext.convert_layout %2 {
+          from_layout = #map2,
+          to_layout = #map1,
+          layout = [#map1]} : tensor<32xi16>
+      %4 = arith.addi %1, %3 {layout = [#map1]} : tensor<32xi16>
+      secret.yield %4 : tensor<32xi16>
+    } -> !secret.secret<tensor<32xi16>>
+    return %0 : !secret.secret<tensor<32xi16>>
+  }
+  ```
+  }];
+  let dependentDialects = [
+    "mlir::arith::ArithDialect",
+    "mlir::func::FuncDialect",
+    "mlir::heir::secret::SecretDialect",
+    "mlir::heir::tensor_ext::TensorExtDialect",
+    "mlir::tensor::TensorDialect",
+  ];
+}
+
+#endif  // LIB_TRANSFORMS_LAYOUTPROPAGATION_LAYOUTPROPAGATION_TD_
diff --git a/lib/Utils/Utils.h b/lib/Utils/Utils.h
index f79d18f7b..f8c0a08d0 100644
--- a/lib/Utils/Utils.h
+++ b/lib/Utils/Utils.h
@@ -5,6 +5,7 @@
 #include <optional>
 #include <string>
 
+#include "mlir/include/mlir/IR/Dialect.h"    // from @llvm-project
 #include "mlir/include/mlir/IR/Operation.h"  // from @llvm-project
 #include "mlir/include/mlir/IR/Types.h"      // from @llvm-project
 #include "mlir/include/mlir/IR/Value.h"      // from @llvm-project
diff --git a/tests/Dialect/TensorExt/IR/ops.mlir b/tests/Dialect/TensorExt/IR/ops.mlir
index 6c200e227..2bc332cd8 100644
--- a/tests/Dialect/TensorExt/IR/ops.mlir
+++ b/tests/Dialect/TensorExt/IR/ops.mlir
@@ -14,3 +14,8 @@ func.func @test_convert_layout(%0: tensor<16x16xi32>) -> tensor<16x16xi32> {
   %1 = tensor_ext.convert_layout %0 {from_layout = #row_major, to_layout = #col_major} : tensor<16x16xi32>
   return %1 : tensor<16x16xi32>
 }
+
+func.func @test_assign_layout(%0: tensor<16x16xi32>) -> tensor<16x16xi32> {
+  %1 = tensor_ext.assign_layout %0 {layout = #row_major} : tensor<16x16xi32>
+  return %1 : tensor<16x16xi32>
+}
diff --git a/tests/Transforms/layout_propagation/BUILD b/tests/Transforms/layout_propagation/BUILD
new file mode 100644
index 000000000..c571e6fc6
--- /dev/null
+++ b/tests/Transforms/layout_propagation/BUILD
@@ -0,0 +1,10 @@
+load("//bazel:lit.bzl", "glob_lit_tests")
+
+package(default_applicable_licenses = ["@heir//:license"])
+
+glob_lit_tests(
+    name = "all_tests",
+    data = ["@heir//tests:test_utilities"],
+    driver = "@heir//tests:run_lit.sh",
+    test_file_exts = ["mlir"],
+)
diff --git a/tests/Transforms/layout_propagation/elementwise_add.mlir b/tests/Transforms/layout_propagation/elementwise_add.mlir
new file mode 100644
index 000000000..eb2944863
--- /dev/null
+++ b/tests/Transforms/layout_propagation/elementwise_add.mlir
@@ -0,0 +1,16 @@
+// RUN: heir-opt --layout-propagation %s | FileCheck %s
+
+!stensor = !secret.secret<tensor<32x32xi16>>
+#row_major = affine_map<(i, j) -> (32*i + j)>
+
+// Just test that the layout propagation pass runs, even though no layout
+// conversion ops are inserted.
+// CHECK-LABEL: elementwise_sum
+func.func @elementwise_sum(%arg0: !stensor, %arg1: !stensor) -> !stensor {
+  %0 = secret.generic ins(%arg0, %arg1: !stensor, !stensor) {
+  ^body(%pt_arg0: tensor<32x32xi16>, %pt_arg1: tensor<32x32xi16>):
+    %3 = arith.addi %pt_arg0, %pt_arg1: tensor<32x32xi16>
+    secret.yield %3 : tensor<32x32xi16>
+  } -> !stensor
+  return %0 : !stensor
+}
diff --git a/tests/Transforms/layout_propagation/insert_conversion.mlir b/tests/Transforms/layout_propagation/insert_conversion.mlir
new file mode 100644
index 000000000..cb6e9f8df
--- /dev/null
+++ b/tests/Transforms/layout_propagation/insert_conversion.mlir
@@ -0,0 +1,64 @@
+// RUN: heir-opt --layout-propagation %s | FileCheck %s
+
+!tensor = tensor<32x32xi16>
+!tensor2 = tensor<32xi16>
+!stensor = !secret.secret<!tensor>
+!stensor2 = !secret.secret<!tensor2>
+
+// Test that when an operation changes the tensor layour in an incompatible way,
+// a layout conversion operation is inserted.
+
+// CHECK: [[input_map:#[^ ]*]] = affine_map<(d0, d1) -> (d0 * 32 + d1)>
+// CHECK: [[row_reduced_map:#[^ ]*]] = affine_map<(d0) -> (d0)>
+// CHECK: [[col_reduced_map:#[^ ]*]] = affine_map<(d0) -> (d0 * 32)>
+
+// CHECK: insert_conversion
+// CHECK-SAME: %[[arg0:[^:]+]]: !secret.secret<tensor<32x32xi16>> {tensor_ext.layout = #tensor_ext.layout<layout = (d0, d1) -> (d0 * 32 + d1)>}
+// CHECK-SAME: %[[arg1:[^:]+]]: !secret.secret<tensor<32x32xi16>> {tensor_ext.layout = #tensor_ext.layout<layout = (d0, d1) -> (d0 * 32 + d1)>}
+func.func @insert_conversion(%arg0: !stensor, %arg1: !stensor) -> !stensor2 {
+  // CHECK: [[init0:%.*]] = arith.constant dense<0>
+  // CHECK: [[init1:%.*]] = arith.constant dense<0>
+  %out_1 = arith.constant dense<0> : !tensor2
+  %out_2 = arith.constant dense<0> : !tensor2
+
+  // CHECK: secret.generic
+  // CHECK-SAME: ins(%[[arg0]], %[[arg1]]
+  // CHECK-SAME: [[arg0]] = {layout = [[input_map]]}
+  // CHECK-SAME: [[arg1]] = {layout = [[input_map]]}
+  // Note this one denotes the layout of the result of the generic op
+  // CHECK-SAME: layout = [
+  // CHECK-SAME: [[row_reduced_map]]]
+  %0 = secret.generic ins(%arg0, %arg1: !stensor, !stensor) {
+  ^body(%pt_arg0: !tensor, %pt_arg1: !tensor):
+    // CHECK: tensor_ext.assign_layout [[init0]] {layout = [[row_reduced_map]]}
+
+    // result of sum has row-major layout, i.e., with implicit repetition at the end
+    // (1, 2, ..., 32, 1, 2, ..., 32, ...)
+    // CHECK: [[unconverted:[^ ]+]] = linalg.reduce
+    // CHECK-SAME: {layout = [[[row_reduced_map]]]}
+    %1 = linalg.reduce { arith.addi } ins(%pt_arg0:!tensor) outs(%out_1:!tensor2) dimensions = [0]
+
+    // CHECK: tensor_ext.assign_layout [[init1]]
+    // CHECK-sAME: layout = [[row_reduced_map]]
+    // CHECK: tensor_ext.convert_layout
+    // CHECK-SAME: from_layout = [[row_reduced_map]]
+    // CHECK-SAME: to_layout = [[col_reduced_map]]
+
+    // result of sum has column-major layout, i.e., strided
+    // (1, x, ..., x, 2, x, ..., x, 3, x, ..., x, ...)
+    // At this stage, layout inference would annotate this with #strided attr
+    // CHECK: [[to_convert:%.+]] = linalg.reduce
+    // CHECK-SAME: {layout = [[[col_reduced_map]]]}
+    %2 = linalg.reduce { arith.addi } ins(%pt_arg1:!tensor) outs(%out_2:!tensor2) dimensions = [1]
+
+    // CHECK: [[converted:%.+]] = tensor_ext.convert_layout [[to_convert]]
+    // CHECK-SAME: from_layout = [[col_reduced_map]]
+    // CHECK-SAME: layout = [
+    // CHECK-SAME: [[row_reduced_map]]]
+    // CHECK-SAME: to_layout = [[row_reduced_map]]
+    // CHECK: arith.addi [[unconverted]], [[converted]]
+    %3 = arith.addi %1, %2 : !tensor2
+    secret.yield %3 : !tensor2
+  } -> !stensor2
+  return %0 : !stensor2
+}
diff --git a/tools/BUILD b/tools/BUILD
index e5bc74863..5225503ce 100644
--- a/tools/BUILD
+++ b/tools/BUILD
@@ -101,6 +101,7 @@ cc_binary(
         "@heir//lib/Transforms/ForwardInsertToExtract",
         "@heir//lib/Transforms/ForwardStoreToLoad",
         "@heir//lib/Transforms/FullLoopUnroll",
+        "@heir//lib/Transforms/LayoutPropagation",
         "@heir//lib/Transforms/LinalgCanonicalizations",
         "@heir//lib/Transforms/MemrefToArith:ExpandCopy",
         "@heir//lib/Transforms/MemrefToArith:MemrefToArithRegistration",
diff --git a/tools/heir-opt.cpp b/tools/heir-opt.cpp
index 50c55c0ea..49831eddc 100644
--- a/tools/heir-opt.cpp
+++ b/tools/heir-opt.cpp
@@ -63,6 +63,7 @@
 #include "lib/Transforms/ForwardInsertToExtract/ForwardInsertToExtract.h"
 #include "lib/Transforms/ForwardStoreToLoad/ForwardStoreToLoad.h"
 #include "lib/Transforms/FullLoopUnroll/FullLoopUnroll.h"
+#include "lib/Transforms/LayoutPropagation/LayoutPropagation.h"
 #include "lib/Transforms/LinalgCanonicalizations/LinalgCanonicalizations.h"
 #include "lib/Transforms/OperationBalancer/OperationBalancer.h"
 #include "lib/Transforms/OptimizeRelinearization/OptimizeRelinearization.h"
@@ -276,6 +277,7 @@ int main(int argc, char **argv) {
   registerStraightLineVectorizerPasses();
   registerUnusedMemRefPasses();
   registerOptimizeRelinearizationPasses();
+  registerLayoutPropagationPasses();
   registerLinalgCanonicalizationsPasses();
   registerTensorToScalarsPasses();
   // Register yosys optimizer pipeline if configured.