diff --git a/tests/e2e/linalg_ext_ops/BUILD.bazel b/tests/e2e/linalg_ext_ops/BUILD.bazel index f437e9d4d4b8..acc14d1c61f9 100644 --- a/tests/e2e/linalg_ext_ops/BUILD.bazel +++ b/tests/e2e/linalg_ext_ops/BUILD.bazel @@ -24,6 +24,9 @@ ALL_SRCS = enforce_glob( "winograd_output.mlir", ], include = ["*.mlir"], + exclude = [ + "attention_i1_mask.mlir", + ], ) iree_check_single_backend_test_suite( @@ -39,6 +42,24 @@ iree_check_single_backend_test_suite( target_backend = "llvm-cpu", ) +iree_check_single_backend_test_suite( + name = "check_llvm-cpu_local-task_i1", + srcs = [ + "attention_i1_mask.mlir", + ], + compiler_flags = [ + "--iree-llvmcpu-target-cpu=generic", + "--iree-experimental-packed-i1-storage", + ], + driver = "local-task", + tags = [ + # attention fails with a wasm target, just disable the tests there for now + # error: Yield operand #2 is not equivalent to the corresponding iter bbArg + "nowasm", + ], + target_backend = "llvm-cpu", +) + VMVX_SRCS = enforce_glob( # keep sorted [ @@ -52,6 +73,7 @@ VMVX_SRCS = enforce_glob( include = ["*.mlir"], exclude = [ "attention.mlir", + "attention_i1_mask.mlir", ], ) @@ -75,6 +97,7 @@ LLVM_GPU_SRCS = enforce_glob( include = ["*.mlir"], exclude = [ "attention.mlir", + "attention_i1_mask.mlir", ], ) @@ -107,6 +130,7 @@ ROCM_HIP_SRCS = enforce_glob( exclude = [ "top-k.mlir", "attention.mlir", + "attention_i1_mask.mlir", ], ) @@ -131,6 +155,7 @@ iree_check_single_backend_test_suite( include = ["*.mlir"], exclude = [ "attention.mlir", + "attention_i1_mask.mlir", "top-k.mlir", ], ), @@ -152,6 +177,7 @@ iree_check_single_backend_test_suite( include = ["*.mlir"], exclude = [ "attention.mlir", + "attention_i1_mask.mlir", "top-k.mlir", ], ), diff --git a/tests/e2e/linalg_ext_ops/CMakeLists.txt b/tests/e2e/linalg_ext_ops/CMakeLists.txt index 5bc968c6e9b7..37377670dd20 100644 --- a/tests/e2e/linalg_ext_ops/CMakeLists.txt +++ b/tests/e2e/linalg_ext_ops/CMakeLists.txt @@ -31,6 +31,22 @@ iree_check_single_backend_test_suite( "nowasm" ) +iree_check_single_backend_test_suite( + NAME + check_llvm-cpu_local-task_i1 + SRCS + "attention_i1_mask.mlir" + TARGET_BACKEND + "llvm-cpu" + DRIVER + "local-task" + COMPILER_FLAGS + "--iree-llvmcpu-target-cpu=generic" + "--iree-experimental-packed-i1-storage" + LABELS + "nowasm" +) + iree_check_single_backend_test_suite( NAME check_vmvx_local-task diff --git a/tests/e2e/linalg_ext_ops/attention.mlir b/tests/e2e/linalg_ext_ops/attention.mlir index a874b230a7a5..693092b16e63 100644 --- a/tests/e2e/linalg_ext_ops/attention.mlir +++ b/tests/e2e/linalg_ext_ops/attention.mlir @@ -66,6 +66,48 @@ func.func @causal_attention1x3x4() { return } +func.func @attention1x4x4_i1_mask_all_ones() { + %init = tensor.empty() : tensor<1x4x4xf32> + %query = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4], + [0.5, 0.6, 0.7, 0.8], + [0.9, 1.0, 1.1, 1.2], + [1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32> + + %key = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4], + [0.5, 0.6, 0.7, 0.8], + [0.9, 1.0, 1.1, 1.2], + [1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32> + %value = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4], + [0.5, 0.6, 0.7, 0.8], + [0.9, 1.0, 1.1, 1.2], + [1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32> + + %mask = util.unfoldable_constant dense<[[[true, true, true, true], + [true, true, true, true], + [true, true, true, true], + [true, true, true, true]]]> : tensor<1x4x4xi1> + + %scale = arith.constant 0.5 : f32 + %1 = iree_linalg_ext.attention {indexing_maps = [affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>, + affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d2)>, + affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d4)>, + affine_map<(d0, d1, d2, d3, d4) -> ()>, + affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d3)>, + affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d4)>]} + ins(%query, %key, %value, %scale, %mask : tensor<1x4x4xf32>, + tensor<1x4x4xf32>, tensor<1x4x4xf32>, f32, tensor<1x4x4xi1>) outs(%init : tensor<1x4x4xf32>) { + ^bb0(%arg0: f32): + iree_linalg_ext.yield %arg0 : f32 + } -> tensor<1x4x4xf32> + check.expect_almost_eq_const( + %1, + dense<[[[0.798884, 0.898884, 0.998884, 1.09888], + [0.941939, 1.04194, 1.14194, 1.24194], + [1.05371, 1.15371, 1.25371, 1.35371], + [1.13295, 1.23295, 1.33295, 1.43295]]]> : tensor<1x4x4xf32> + ) : tensor<1x4x4xf32> + return +} func.func @softcap_attention1x3x4() { %init = tensor.empty() : tensor<1x3x4xf32> diff --git a/tests/e2e/linalg_ext_ops/attention_i1_mask.mlir b/tests/e2e/linalg_ext_ops/attention_i1_mask.mlir new file mode 100644 index 000000000000..e4a4631c1cd0 --- /dev/null +++ b/tests/e2e/linalg_ext_ops/attention_i1_mask.mlir @@ -0,0 +1,122 @@ +func.func @attention1x4x4_i1_mask() { + %init = tensor.empty() : tensor<1x4x4xf32> + %query = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4], + [0.5, 0.6, 0.7, 0.8], + [0.9, 1.0, 1.1, 1.2], + [1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32> + + %key = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4], + [0.5, 0.6, 0.7, 0.8], + [0.9, 1.0, 1.1, 1.2], + [1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32> + %value = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4], + [0.5, 0.6, 0.7, 0.8], + [0.9, 1.0, 1.1, 1.2], + [1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32> + + %i8mask = util.unfoldable_constant dense<[165, 165]> : tensor<2xi8> + %mask = flow.tensor.bitcast %i8mask : tensor<2xi8> -> tensor<1x4x4xi1> + + %scale = arith.constant 0.5 : f32 + %1 = iree_linalg_ext.attention {indexing_maps = [affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>, + affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d2)>, + affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d4)>, + affine_map<(d0, d1, d2, d3, d4) -> ()>, + affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d3)>, + affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d4)>]} + ins(%query, %key, %value, %scale, %mask : tensor<1x4x4xf32>, + tensor<1x4x4xf32>, tensor<1x4x4xf32>, f32, tensor<1x4x4xi1>) outs(%init : tensor<1x4x4xf32>) { + ^bb0(%arg0: f32): + iree_linalg_ext.yield %arg0 : f32 + } -> tensor<1x4x4xf32> + check.expect_almost_eq_const( + %1, + dense<[[[0.57895, 0.67895, 0.77895, 0.87895], + [1.09108, 1.19108, 1.29108, 1.39108], + [0.774324, 0.874324, 0.974324, 1.07432], + [1.22842, 1.32842, 1.42842, 1.52842]]]> : tensor<1x4x4xf32> + ) : tensor<1x4x4xf32> + return +} + +func.func @attention1x4x4_i1_mask_all_ones() { + %init = tensor.empty() : tensor<1x4x4xf32> + %query = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4], + [0.5, 0.6, 0.7, 0.8], + [0.9, 1.0, 1.1, 1.2], + [1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32> + + %key = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4], + [0.5, 0.6, 0.7, 0.8], + [0.9, 1.0, 1.1, 1.2], + [1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32> + %value = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4], + [0.5, 0.6, 0.7, 0.8], + [0.9, 1.0, 1.1, 1.2], + [1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32> + + %i8mask = util.unfoldable_constant dense<[255, 255]> : tensor<2xi8> + %mask = flow.tensor.bitcast %i8mask : tensor<2xi8> -> tensor<1x4x4xi1> + + %scale = arith.constant 0.5 : f32 + %1 = iree_linalg_ext.attention {indexing_maps = [affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>, + affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d2)>, + affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d4)>, + affine_map<(d0, d1, d2, d3, d4) -> ()>, + affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d3)>, + affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d4)>]} + ins(%query, %key, %value, %scale, %mask : tensor<1x4x4xf32>, + tensor<1x4x4xf32>, tensor<1x4x4xf32>, f32, tensor<1x4x4xi1>) outs(%init : tensor<1x4x4xf32>) { + ^bb0(%arg0: f32): + iree_linalg_ext.yield %arg0 : f32 + } -> tensor<1x4x4xf32> + check.expect_almost_eq_const( + %1, + dense<[[[0.798884, 0.898884, 0.998884, 1.09888], + [0.941939, 1.04194, 1.14194, 1.24194], + [1.05371, 1.15371, 1.25371, 1.35371], + [1.13295, 1.23295, 1.33295, 1.43295]]]> : tensor<1x4x4xf32> + ) : tensor<1x4x4xf32> + return +} + +func.func @attention1x4x4_i1_mask_tril() { + %init = tensor.empty() : tensor<1x4x4xf32> + %query = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4], + [0.5, 0.6, 0.7, 0.8], + [0.9, 1.0, 1.1, 1.2], + [1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32> + + %key = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4], + [0.5, 0.6, 0.7, 0.8], + [0.9, 1.0, 1.1, 1.2], + [1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32> + %value = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4], + [0.5, 0.6, 0.7, 0.8], + [0.9, 1.0, 1.1, 1.2], + [1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32> + + %i8mask = util.unfoldable_constant dense<[140, 239]> : tensor<2xi8> + %mask = flow.tensor.bitcast %i8mask : tensor<2xi8> -> tensor<1x4x4xi1> + + %scale = arith.constant 0.5 : f32 + %1 = iree_linalg_ext.attention {indexing_maps = [affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>, + affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d2)>, + affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d4)>, + affine_map<(d0, d1, d2, d3, d4) -> ()>, + affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d3)>, + affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d4)>]} + ins(%query, %key, %value, %scale, %mask : tensor<1x4x4xf32>, + tensor<1x4x4xf32>, tensor<1x4x4xf32>, f32, tensor<1x4x4xi1>) outs(%init : tensor<1x4x4xf32>) { + ^bb0(%arg0: f32): + iree_linalg_ext.yield %arg0 : f32 + } -> tensor<1x4x4xf32> + check.expect_almost_eq_const( + %1, + dense<[[[1.11993, 1.21993, 1.31993, 1.41993], + [1.3, 1.4, 1.5, 1.6], + [1.05371, 1.15371, 1.25371, 1.35371], + [1.15549, 1.25549, 1.35549, 1.45549]]]> : tensor<1x4x4xf32> + ) : tensor<1x4x4xf32> + return +} diff --git a/tests/e2e/subbyte_types/subbyte_types.mlir b/tests/e2e/subbyte_types/subbyte_types.mlir index e5d2e0fbcfc6..a1fa90bf4446 100644 --- a/tests/e2e/subbyte_types/subbyte_types.mlir +++ b/tests/e2e/subbyte_types/subbyte_types.mlir @@ -26,3 +26,74 @@ func.func @i1_type_slice() { check.expect_eq_const(%tensor_res, dense<[255]> : tensor<1xi8>) : tensor<1xi8> return } + +func.func @i1_representation() { + %mask = util.unfoldable_constant dense<[140]> : tensor<1xi8> + %casted = flow.tensor.bitcast %mask : tensor<1xi8> -> tensor<2x4xi1> + %bar = util.optimization_barrier %casted : tensor<2x4xi1> + %tensor_res = flow.tensor.bitcast %bar : tensor<2x4xi1> -> tensor<1xi8> + check.expect_eq_const(%tensor_res, dense<[140]> : tensor<1xi8>) : tensor<1xi8> + return +} + +func.func @i1_representation_2() { + %mask = util.unfoldable_constant dense<[140, 77]> : tensor<2xi8> + %casted = flow.tensor.bitcast %mask : tensor<2xi8> -> tensor<2x8xi1> + %bar = util.optimization_barrier %casted : tensor<2x8xi1> + %tensor_res = flow.tensor.bitcast %bar : tensor<2x8xi1> -> tensor<2xi8> + check.expect_eq_const(%tensor_res, dense<[140, 77]> : tensor<2xi8>) : tensor<2xi8> + return +} + +func.func @i1_representation_3() { + %mask = util.unfoldable_constant dense<[140, 77]> : tensor<2xi8> + %casted = flow.tensor.bitcast %mask : tensor<2xi8> -> tensor<4x4xi1> + %bar = util.optimization_barrier %casted : tensor<4x4xi1> + %tensor_res = flow.tensor.bitcast %bar : tensor<4x4xi1> -> tensor<2xi8> + check.expect_eq_const(%tensor_res, dense<[140, 77]> : tensor<2xi8>) : tensor<2xi8> + return +} + +func.func @truncate_i1() { + %mask = util.unfoldable_constant dense<[1, 1, 0, 0, + 0, 0, 1, 1]> : tensor<8xi8> + %nm = tensor.empty() : tensor<8xi1> + %truncm = linalg.generic + {indexing_maps = [ + affine_map<(d0) -> (d0)>, + affine_map<(d0) -> (d0)>], + iterator_types = ["parallel"]} + ins(%mask: tensor<8xi8>) + outs(%nm: tensor<8xi1>) { + ^bb0(%in: i8, %out: i1): + %zero = arith.constant 0 : i8 + %truncated = arith.cmpi "sgt", %in, %zero : i8 + linalg.yield %truncated : i1 + } -> tensor<8xi1> + %tensor_res = flow.tensor.bitcast %truncm : tensor<8xi1> -> tensor<1xi8> + check.expect_eq_const(%tensor_res, dense<[195]> : tensor<1xi8>) : tensor<1xi8> + return +} + +func.func @truncate_i1_2() { + %mask = util.unfoldable_constant dense<[[0, 0, 1, 1], + [1, 1, 0, 0], + [1, 1, 0, 0], + [0, 0, 1, 1]]> : tensor<4x4xi8> + %nm = tensor.empty() : tensor<4x4xi1> + %truncm = linalg.generic + {indexing_maps = [ + affine_map<(d0, d1) -> (d0, d1)>, + affine_map<(d0, d1) -> (d0, d1)>], + iterator_types = ["parallel", "parallel"]} + ins(%mask: tensor<4x4xi8>) + outs(%nm: tensor<4x4xi1>) { + ^bb0(%in: i8, %out: i1): + %zero = arith.constant 0 : i8 + %truncated = arith.cmpi "sgt", %in, %zero : i8 + linalg.yield %truncated : i1 + } -> tensor<4x4xi1> + %tensor_res = flow.tensor.bitcast %truncm : tensor<4x4xi1> -> tensor<2xi8> + check.expect_eq_const(%tensor_res, dense<[60, 195]> : tensor<2xi8>) : tensor<2xi8> + return +}