-
Notifications
You must be signed in to change notification settings - Fork 641
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Codegen][GPU] Add a pass for basic distribution verification (#18236)
After various levels of tiling to warps/threads and then bufferization in GPU codegen pipelines, but before resolving distributed loops like `scf.forall` ops, we have an opportunity for additional verification that all operations were properly mapped to threads. In particular, any operation that vectorized/bufferized to an operation with a write effect must now be within a *thread* distributed context or else there is almost certainly a write race. Such cases means something went wrong in earlier passes and is a compiler failure. Note: this is only added for the LLVMGPUTileAndFuse pipeline because other pipelines allow for write effecting ops like `memref.copy` to persist past `scf.forall` resolution.
- Loading branch information
Showing
8 changed files
with
130 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
91 changes: 91 additions & 0 deletions
91
compiler/src/iree/compiler/Codegen/Common/GPU/GPUVerifyDistribution.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
// Copyright 2024 The IREE Authors | ||
// | ||
// Licensed under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
|
||
#include "iree/compiler/Codegen/Common/GPU/Passes.h" | ||
#include "iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenAttrs.h" | ||
#include "iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUAttrs.h" | ||
#include "mlir/Dialect/GPU/IR/GPUDialect.h" | ||
#include "mlir/IR/Visitors.h" | ||
#include "mlir/Interfaces/FunctionInterfaces.h" | ||
#include "mlir/Interfaces/SideEffectInterfaces.h" | ||
|
||
namespace mlir::iree_compiler { | ||
|
||
#define GEN_PASS_DEF_GPUVERIFYDISTRIBUTIONPASS | ||
#include "iree/compiler/Codegen/Common/GPU/Passes.h.inc" | ||
|
||
namespace { | ||
|
||
template <typename... Type> | ||
bool forallOpHasMappingType(scf::ForallOp forallOp) { | ||
std::optional<ArrayAttr> mapping = forallOp.getMapping(); | ||
if (!mapping || mapping.value().empty()) { | ||
return false; | ||
} | ||
|
||
return isa<Type...>(*mapping.value().begin()); | ||
} | ||
|
||
template <typename... Type> | ||
bool operationHasParentForallOfMappingType(Operation *op) { | ||
auto parentForallOp = op->getParentOfType<scf::ForallOp>(); | ||
while (parentForallOp) { | ||
if (forallOpHasMappingType<Type...>(parentForallOp)) { | ||
return true; | ||
} | ||
parentForallOp = parentForallOp->getParentOfType<scf::ForallOp>(); | ||
} | ||
return false; | ||
} | ||
|
||
/// Pass to verify that writes only happen in distributed contexts. Code in | ||
/// shared contexts are executed uniformly across all threads after resolution | ||
/// of distributed contexts (i.e. scf.forall), thus operations with write | ||
/// memory effects are inherently | ||
struct GPUVerifyDistributionPass final | ||
: impl::GPUVerifyDistributionPassBase<GPUVerifyDistributionPass> { | ||
|
||
void runOnOperation() override { | ||
FunctionOpInterface funcOp = getOperation(); | ||
|
||
WalkResult res = funcOp.walk([](Operation *op) { | ||
if (auto forallOp = dyn_cast<scf::ForallOp>(op)) { | ||
std::optional<ArrayAttr> mapping = forallOp.getMapping(); | ||
if (!mapping || mapping.value().empty()) { | ||
forallOp->emitOpError("requires a mapping attribute."); | ||
return WalkResult::interrupt(); | ||
} | ||
|
||
if (isa<IREE::GPU::LaneIdAttr>(*mapping.value().begin()) && | ||
!operationHasParentForallOfMappingType< | ||
mlir::gpu::GPUWarpMappingAttr>(forallOp)) { | ||
forallOp->emitOpError("lane distributed scf.forall must have a " | ||
"parent subgroup distributed loop."); | ||
return WalkResult::interrupt(); | ||
} | ||
return WalkResult::advance(); | ||
} | ||
if (auto memoryEffectOp = dyn_cast<MemoryEffectOpInterface>(op)) { | ||
if (memoryEffectOp.hasEffect<MemoryEffects::Write>() && | ||
!operationHasParentForallOfMappingType< | ||
mlir::gpu::GPUThreadMappingAttr, IREE::GPU::LaneIdAttr>(op)) { | ||
op->emitOpError("write affecting operations are restricted to lane " | ||
"or thread distributed contexts."); | ||
return WalkResult::interrupt(); | ||
} | ||
} | ||
return WalkResult::advance(); | ||
}); | ||
|
||
if (res.wasInterrupted()) { | ||
return signalPassFailure(); | ||
} | ||
} | ||
}; | ||
|
||
} // namespace | ||
|
||
} // namespace mlir::iree_compiler |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
29 changes: 29 additions & 0 deletions
29
compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_verify_distribution.mlir
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
// RUN: iree-opt %s --split-input-file --verify-diagnostics \ | ||
// RUN: --pass-pipeline="builtin.module(func.func(iree-codegen-gpu-verify-distribution))" | ||
|
||
func.func @unmapped_forall(%out : memref<32xi32>) { | ||
// expected-error @+1 {{requires a mapping attribute}} | ||
scf.forall (%arg0) in (32) { | ||
} | ||
return | ||
} | ||
|
||
// ----- | ||
|
||
func.func @write_in_warp_forall(%out : memref<32xi32>) { | ||
%c0 = arith.constant 0 : i32 | ||
scf.forall (%arg0) in (32) { | ||
// expected-error@+1 {{write affecting operations are restricted to lane or thread distributed contexts}} | ||
memref.store %c0, %out[%arg0] : memref<32xi32> | ||
} {mapping = [#gpu.warp<x>]} | ||
return | ||
} | ||
|
||
// ----- | ||
|
||
func.func @lane_forall_no_warp_parent(%out : memref<32xi32>) { | ||
// expected-error@+1 {{lane distributed scf.forall must have a parent subgroup distributed loop}} | ||
scf.forall (%arg0) in (32) { | ||
} {mapping = [#iree_gpu.lane_id<0>]} | ||
return | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters