Skip to content

Commit

Permalink
Check isIntOrFloat before querying bitwidth (#19172)
Browse files Browse the repository at this point in the history
The element type must be an int or float before using
`getIntOrFloatBitWidth`. This adds a check if the element type is an int
or float, otherwise don't adjust innermost tile size. Introduced by
commit
c80fa3b

1/2 fix for #19167.

---------

Signed-off-by: Ian Wood <ianwood2024@u.northwestern.edu>
Co-authored-by: giacs-epic <179146510+giacs-epic@users.noreply.github.com>
  • Loading branch information
IanWood1 and giacs-epic authored Nov 19, 2024
1 parent 540cebf commit b68c535
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 4 deletions.
12 changes: 8 additions & 4 deletions compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2922,10 +2922,14 @@ setLoweringConfigForComputeOps(mlir::FunctionOpInterface entryPointFn,
// loads and stores will have a performance impact.
auto resultTypes = rootOperation->getResultTypes();
if (commonVecTileSizes.size() != 0 && !resultTypes.empty()) {
auto elementTypeSize =
cast<ShapedType>(rootOperation->getResultTypes().front())
.getElementType()
.getIntOrFloatBitWidth();
Type elementType = cast<ShapedType>(resultTypes[0]).getElementType();
unsigned int elementTypeSize;
if (auto complexType = llvm::dyn_cast<ComplexType>(elementType)) {
elementTypeSize =
2 * complexType.getElementType().getIntOrFloatBitWidth();
} else {
elementTypeSize = elementType.getIntOrFloatBitWidth();
}
// for now just enable for i1
if (elementTypeSize == 1) {
auto innermostTileSize = commonVecTileSizes.back();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1983,3 +1983,52 @@ func.func @i1_type() attributes {hal.executable.target = #executable_target_emb
// CHECK: func @i1_type()
// CHECK: linalg.generic {
// CHECK-SAME: {lowering_config = #[[CONFIG]]}

// -----
#pipeline_layout = #hal.pipeline.layout<bindings = [
#hal.pipeline.binding<storage_buffer>,
#hal.pipeline.binding<storage_buffer>,
#hal.pipeline.binding<storage_buffer>,
#hal.pipeline.binding<storage_buffer>
]>

#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-linux-gnu"}>
#map = affine_map<(d0, d1, d2) -> (d1)>
#map1 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
func.func @complex_view_as_real() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} {
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1xi32>>
%1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2048x50xcomplex<f32>>>
%2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<1x1x32x50x2xf32>>
%3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<32x50x2xf32>>
%4 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [1], strides = [1] : !flow.dispatch.tensor<readonly:tensor<1xi32>> -> tensor<1xi32>
%5 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 32, 50, 2], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x32x50x2xf32>> -> tensor<1x1x32x50x2xf32>
%6 = tensor.empty() : tensor<32x50x2xf32>
%extracted = tensor.extract %4[%c0] : tensor<1xi32>
%7 = arith.extsi %extracted : i32 to i64
%8 = arith.index_cast %7 : i64 to index
%9 = flow.dispatch.tensor.load %1, offsets = [%8, 0], sizes = [1, 50], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2048x50xcomplex<f32>>> -> tensor<50xcomplex<f32>>
%10 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%9 : tensor<50xcomplex<f32>>) outs(%6 : tensor<32x50x2xf32>) {
^bb0(%in: complex<f32>, %out: f32):
%11 = linalg.index 0 : index
%12 = linalg.index 1 : index
%extracted_0 = tensor.extract %5[%c0, %c0, %11, %12, %c0] : tensor<1x1x32x50x2xf32>
%extracted_1 = tensor.extract %5[%c0, %c0, %11, %12, %c1] : tensor<1x1x32x50x2xf32>
%13 = complex.create %extracted_0, %extracted_1 : complex<f32>
%14 = complex.mul %13, %in : complex<f32>
%15 = complex.re %14 : complex<f32>
%16 = complex.im %14 : complex<f32>
%17 = linalg.index 2 : index
%18 = arith.cmpi eq, %17, %c0 : index
%19 = arith.select %18, %15, %16 : f32
linalg.yield %19 : f32
} -> tensor<32x50x2xf32>
flow.dispatch.tensor.store %10, %3, offsets = [0, 0, 0], sizes = [32, 50, 2], strides = [1, 1, 1] : tensor<32x50x2xf32> -> !flow.dispatch.tensor<writeonly:tensor<32x50x2xf32>>
return
}

// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[4, 25, 2], [1, 1, 2], [0, 0, 0], [0, 0, 0]{{\]}}>
// CHECK: func.func @complex_view_as_real()
// CHECK: linalg.generic
// CHECK-SAME: lowering_config = #[[CONFIG]]

0 comments on commit b68c535

Please sign in to comment.