From 2d3bd370eb51d4ccc95be7d7c28695d7be85a0fb Mon Sep 17 00:00:00 2001 From: Ian Wood Date: Fri, 15 Nov 2024 13:04:26 -0800 Subject: [PATCH 1/7] Check if type is int or float Signed-off-by: Ian Wood --- .../Codegen/LLVMCPU/KernelDispatch.cpp | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp index bd3c1f4614ac..d3d3ec6cf0be 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp @@ -2922,16 +2922,18 @@ setLoweringConfigForComputeOps(mlir::FunctionOpInterface entryPointFn, // loads and stores will have a performance impact. auto resultTypes = rootOperation->getResultTypes(); if (commonVecTileSizes.size() != 0 && !resultTypes.empty()) { - auto elementTypeSize = - cast(rootOperation->getResultTypes().front()) - .getElementType() - .getIntOrFloatBitWidth(); - // for now just enable for i1 - if (elementTypeSize == 1) { - auto innermostTileSize = commonVecTileSizes.back(); - commonVecTileSizes.back() = - llvm::alignTo(innermostTileSize * elementTypeSize, 8) / - elementTypeSize; + auto resultType = cast(rootOperation->getResultTypes().front()) + .getElementType(); + if (resultType.isIntOrFloat()) { + auto elementTypeSize = resultType.getIntOrFloatBitWidth(); + + // for now just enable for i1 + if (elementTypeSize == 1) { + auto innermostTileSize = commonVecTileSizes.back(); + commonVecTileSizes.back() = + llvm::alignTo(innermostTileSize * elementTypeSize, 8) / + elementTypeSize; + } } } From 55dc55ba029c63133f2bb400ead4a73d152cfadc Mon Sep 17 00:00:00 2001 From: Ian Wood Date: Mon, 18 Nov 2024 12:59:49 -0800 Subject: [PATCH 2/7] Revert "Check if type is int or float" This reverts commit 2d3bd370eb51d4ccc95be7d7c28695d7be85a0fb. Signed-off-by: Ian Wood --- .../Codegen/LLVMCPU/KernelDispatch.cpp | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp index d3d3ec6cf0be..bd3c1f4614ac 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp @@ -2922,18 +2922,16 @@ setLoweringConfigForComputeOps(mlir::FunctionOpInterface entryPointFn, // loads and stores will have a performance impact. auto resultTypes = rootOperation->getResultTypes(); if (commonVecTileSizes.size() != 0 && !resultTypes.empty()) { - auto resultType = cast(rootOperation->getResultTypes().front()) - .getElementType(); - if (resultType.isIntOrFloat()) { - auto elementTypeSize = resultType.getIntOrFloatBitWidth(); - - // for now just enable for i1 - if (elementTypeSize == 1) { - auto innermostTileSize = commonVecTileSizes.back(); - commonVecTileSizes.back() = - llvm::alignTo(innermostTileSize * elementTypeSize, 8) / - elementTypeSize; - } + auto elementTypeSize = + cast(rootOperation->getResultTypes().front()) + .getElementType() + .getIntOrFloatBitWidth(); + // for now just enable for i1 + if (elementTypeSize == 1) { + auto innermostTileSize = commonVecTileSizes.back(); + commonVecTileSizes.back() = + llvm::alignTo(innermostTileSize * elementTypeSize, 8) / + elementTypeSize; } } From 88dd7af6ff3899d47b4ad5137acf88d63c7dded6 Mon Sep 17 00:00:00 2001 From: giacs-epic <179146510+giacs-epic@users.noreply.github.com> Date: Mon, 18 Nov 2024 16:17:05 +0000 Subject: [PATCH 3/7] Fix crash due to call to Type::getIntOrFloatBitwidth() asserting on complex numbers Signed-off-by: Ian Wood --- .../compiler/Codegen/LLVMCPU/KernelDispatch.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp index bd3c1f4614ac..e394844dd5df 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp @@ -2922,10 +2922,15 @@ setLoweringConfigForComputeOps(mlir::FunctionOpInterface entryPointFn, // loads and stores will have a performance impact. auto resultTypes = rootOperation->getResultTypes(); if (commonVecTileSizes.size() != 0 && !resultTypes.empty()) { - auto elementTypeSize = - cast(rootOperation->getResultTypes().front()) - .getElementType() - .getIntOrFloatBitWidth(); + Type elementType = cast(rootOperation->getResultTypes().front()) + .getElementType(); + unsigned int elementTypeSize; + if (auto complexType = llvm::dyn_cast(elementType)) { + elementTypeSize = + 2 * complexType.getElementType().getIntOrFloatBitWidth(); + } else { + elementTypeSize = elementType.getIntOrFloatBitWidth(); + } // for now just enable for i1 if (elementTypeSize == 1) { auto innermostTileSize = commonVecTileSizes.back(); From 8d95db220361d8db0888af07e0ae98034c8f36ce Mon Sep 17 00:00:00 2001 From: Ian Wood Date: Mon, 18 Nov 2024 13:41:12 -0800 Subject: [PATCH 4/7] Add select lowering strat test Signed-off-by: Ian Wood --- .../test/select_x86_64_lowering_strategy.mlir | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir index 9161c810aa23..c2b5d84b6435 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir @@ -1983,3 +1983,47 @@ func.func @i1_type() attributes {hal.executable.target = #executable_target_emb // CHECK: func @i1_type() // CHECK: linalg.generic { // CHECK-SAME: {lowering_config = #[[CONFIG]]} + +// ----- + +#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-linux-gnu"}> +#map = affine_map<(d0, d1, d2) -> (d1)> +#map1 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> +func.func @complex_view_as_real() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { + %c1 = arith.constant 1 : index + %c0 = arith.constant 0 : index + %0 = hal.interface.binding.subspan layout(, #hal.pipeline.binding, #hal.pipeline.binding], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(, #hal.pipeline.binding, #hal.pipeline.binding], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor>> + %2 = hal.interface.binding.subspan layout(, #hal.pipeline.binding, #hal.pipeline.binding], flags = Indirect>) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %4 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [1], strides = [1] : !flow.dispatch.tensor> -> tensor<1xi32> + %5 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 32, 50, 2], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x1x32x50x2xf32> + %6 = tensor.empty() : tensor<32x50x2xf32> + %extracted = tensor.extract %4[%c0] : tensor<1xi32> + %7 = arith.extsi %extracted : i32 to i64 + %8 = arith.index_cast %7 : i64 to index + %9 = flow.dispatch.tensor.load %1, offsets = [%8, 0], sizes = [1, 50], strides = [1, 1] : !flow.dispatch.tensor>> -> tensor<50xcomplex> + %10 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%9 : tensor<50xcomplex>) outs(%6 : tensor<32x50x2xf32>) { + ^bb0(%in: complex, %out: f32): + %11 = linalg.index 0 : index + %12 = linalg.index 1 : index + %extracted_0 = tensor.extract %5[%c0, %c0, %11, %12, %c0] : tensor<1x1x32x50x2xf32> + %extracted_1 = tensor.extract %5[%c0, %c0, %11, %12, %c1] : tensor<1x1x32x50x2xf32> + %13 = complex.create %extracted_0, %extracted_1 : complex + %14 = complex.mul %13, %in : complex + %15 = complex.re %14 : complex + %16 = complex.im %14 : complex + %17 = linalg.index 2 : index + %18 = arith.cmpi eq, %17, %c0 : index + %19 = arith.select %18, %15, %16 : f32 + linalg.yield %19 : f32 + } -> tensor<32x50x2xf32> + flow.dispatch.tensor.store %10, %3, offsets = [0, 0, 0], sizes = [32, 50, 2], strides = [1, 1, 1] : tensor<32x50x2xf32> -> !flow.dispatch.tensor> + return +} + +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config +// CHECK: func.func @complex_view_as_real() +// CHECK: linalg.generic +// CHECK-SAME: lowering_config = #[[CONFIG]] + From 965fd0b2fd8a46977c05995b137b8d5362b63293 Mon Sep 17 00:00:00 2001 From: Ian Wood Date: Mon, 18 Nov 2024 13:53:33 -0800 Subject: [PATCH 5/7] Address comments Signed-off-by: Ian Wood --- compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp | 3 +-- .../Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp index e394844dd5df..77b99a902cc9 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp @@ -2922,8 +2922,7 @@ setLoweringConfigForComputeOps(mlir::FunctionOpInterface entryPointFn, // loads and stores will have a performance impact. auto resultTypes = rootOperation->getResultTypes(); if (commonVecTileSizes.size() != 0 && !resultTypes.empty()) { - Type elementType = cast(rootOperation->getResultTypes().front()) - .getElementType(); + Type elementType = cast(resultTypes[0]).getElementType(); unsigned int elementTypeSize; if (auto complexType = llvm::dyn_cast(elementType)) { elementTypeSize = diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir index c2b5d84b6435..cfaed32bd62e 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir @@ -2026,4 +2026,3 @@ func.func @complex_view_as_real() attributes {hal.executable.target = #executabl // CHECK: func.func @complex_view_as_real() // CHECK: linalg.generic // CHECK-SAME: lowering_config = #[[CONFIG]] - From d59dbafcdae24d50f985454eb2df42a01aecc83a Mon Sep 17 00:00:00 2001 From: Ian Wood Date: Mon, 18 Nov 2024 14:25:46 -0800 Subject: [PATCH 6/7] Change ordinal num Signed-off-by: Ian Wood --- .../Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir index cfaed32bd62e..cb1b9effc8a7 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir @@ -1995,7 +1995,7 @@ func.func @complex_view_as_real() attributes {hal.executable.target = #executabl %0 = hal.interface.binding.subspan layout(, #hal.pipeline.binding, #hal.pipeline.binding], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> %1 = hal.interface.binding.subspan layout(, #hal.pipeline.binding, #hal.pipeline.binding], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor>> %2 = hal.interface.binding.subspan layout(, #hal.pipeline.binding, #hal.pipeline.binding], flags = Indirect>) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(, #hal.pipeline.binding, #hal.pipeline.binding], flags = Indirect>) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %4 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [1], strides = [1] : !flow.dispatch.tensor> -> tensor<1xi32> %5 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 32, 50, 2], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x1x32x50x2xf32> %6 = tensor.empty() : tensor<32x50x2xf32> From 0e0610f129416fedabddf9761fa7588d55d11c8a Mon Sep 17 00:00:00 2001 From: Ian Wood Date: Tue, 19 Nov 2024 03:13:46 -0800 Subject: [PATCH 7/7] Hoist pipeline layout and fix ordinal Signed-off-by: Ian Wood --- .../test/select_x86_64_lowering_strategy.mlir | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir index cb1b9effc8a7..22a288062bc2 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir @@ -1985,6 +1985,12 @@ func.func @i1_type() attributes {hal.executable.target = #executable_target_emb // CHECK-SAME: {lowering_config = #[[CONFIG]]} // ----- +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding +]> #executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-linux-gnu"}> #map = affine_map<(d0, d1, d2) -> (d1)> @@ -1992,10 +1998,10 @@ func.func @i1_type() attributes {hal.executable.target = #executable_target_emb func.func @complex_view_as_real() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { %c1 = arith.constant 1 : index %c0 = arith.constant 0 : index - %0 = hal.interface.binding.subspan layout(, #hal.pipeline.binding, #hal.pipeline.binding], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %1 = hal.interface.binding.subspan layout(, #hal.pipeline.binding, #hal.pipeline.binding], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor>> - %2 = hal.interface.binding.subspan layout(, #hal.pipeline.binding, #hal.pipeline.binding], flags = Indirect>) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> - %3 = hal.interface.binding.subspan layout(, #hal.pipeline.binding, #hal.pipeline.binding], flags = Indirect>) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor>> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> %4 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [1], strides = [1] : !flow.dispatch.tensor> -> tensor<1xi32> %5 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 32, 50, 2], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x1x32x50x2xf32> %6 = tensor.empty() : tensor<32x50x2xf32>