From d160d75dd14cdf71316f28fa616f0232140ac2b9 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Mon, 10 Feb 2025 14:40:44 +0100 Subject: [PATCH 01/13] [SYCL][DOC] Describe process of prototyping KHR extensions (#16883) --- sycl/doc/developer/KHRExtensions.md | 48 +++++++++++++++++++++++++++++ sycl/doc/index.rst | 1 + 2 files changed, 49 insertions(+) create mode 100644 sycl/doc/developer/KHRExtensions.md diff --git a/sycl/doc/developer/KHRExtensions.md b/sycl/doc/developer/KHRExtensions.md new file mode 100644 index 0000000000000..7890eb6707b85 --- /dev/null +++ b/sycl/doc/developer/KHRExtensions.md @@ -0,0 +1,48 @@ +# Considerations for working on KHR extensions + +SYCL specification evolves through embedding extensions developed by various +vendors, including Khronos Group itself (`khr` extensions). + +In order for a KHR extension to be accepted, there must be CTS tests for it and +at least one implementation which passes them. + +Considering that KHR extensions are being developed in public, we can start +prototyping them as soon as corresponding PR for an extension is published at +KhronosGroup/SYCL-Docs. + +However, we shouldn't be exposing those extensions to end users until the +extension if finalised, ratified and published by Khronos - due to risk of an +extension changing during that process and lack of the officially published +version of it. + +So, we can have a PR but can't merge it. Keeping PRs opened for a long time is a +bad practice, because they tend to get stale: there are merge conflicts, +potential functional issues due to the codebase changes, etc. + +In order for us to avoid stale PRs, all functionality which is a public +interface of an "in-progress" KHR extension, must be hidden under +`__DPCPP_ENABLE_UNFINISHED_KHR_EXTENSIONS` macro. That way we can merge a PR to +avoid constantly maintaining it in a good shape, start automatically testing it +but at the same time avoid exposing incomplete and/or undocumented feature to +end users just yet. + +"in-progress" KHR extension term used above is defined as: +- PR proposing a KHR extension has not been merged/cherry-picked to `sycl-2020` + branch of KhronosGroup/SYCL-Docs. + + That only happens after all formal processes on Khronos Group side are + completed so an extension can be considered good and stable to be released by + us. + + Note: merge of an extension proposal PR into `main` branch of + KhronosGroup/SYCL-Docs repo is **not** enough. +- Published (i.e. the above bullet complete) KHR extension, which hasn't been + fully implemented by us + +The macro is **not** intended to be used by end users and its purpose is to +simplify our development process by allowing us to merge implementation (full +or partial) of the aforementioned extensions earlier to simplify maintenance and +enable automated testing. + +Due to this reason, we are not providing a separate macro for each "in-progress" +KHR extension we may (partially) support, but just a single guard. diff --git a/sycl/doc/index.rst b/sycl/doc/index.rst index cc4961dd7f438..f5e5ce5574215 100644 --- a/sycl/doc/index.rst +++ b/sycl/doc/index.rst @@ -66,3 +66,4 @@ Developer Documentation developer/DockerBKMs developer/ABIPolicyGuide developer/ContributeToDPCPP + developer/KHRExtensions From eaff40cb57e89fc4a5c6b166f99b176bede668d0 Mon Sep 17 00:00:00 2001 From: aelovikov-intel Date: Mon, 10 Feb 2025 06:46:02 -0800 Subject: [PATCH 02/13] [NFCI][SYCL] Eliminate `sycl/builtins_utils_*.hpp` (#16931) Split the helpers they used to provide between the builtins implementation and `sycl/detail/type_traits/vec_marray_traits.hpp`. --- sycl/include/sycl/builtins_utils_scalar.hpp | 73 ----------------- sycl/include/sycl/builtins_utils_vec.hpp | 79 ------------------ .../include/sycl/detail/builtins/builtins.hpp | 81 ++++++++++++++++++- .../detail/type_traits/vec_marray_traits.hpp | 16 ++++ .../ext/oneapi/experimental/bfloat16_math.hpp | 1 - .../sycl/ext/oneapi/experimental/builtins.hpp | 1 - 6 files changed, 96 insertions(+), 155 deletions(-) delete mode 100644 sycl/include/sycl/builtins_utils_scalar.hpp delete mode 100644 sycl/include/sycl/builtins_utils_vec.hpp diff --git a/sycl/include/sycl/builtins_utils_scalar.hpp b/sycl/include/sycl/builtins_utils_scalar.hpp deleted file mode 100644 index 7bb9e7b2ec502..0000000000000 --- a/sycl/include/sycl/builtins_utils_scalar.hpp +++ /dev/null @@ -1,73 +0,0 @@ -//==--- builtins_utils_scalar.hpp - SYCL built-in function utilities -------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -namespace sycl { -inline namespace _V1 { - -namespace detail { -#ifdef __FAST_MATH__ -template -struct use_fast_math - : std::is_same>, float> {}; -#else -template struct use_fast_math : std::false_type {}; -#endif -template constexpr bool use_fast_math_v = use_fast_math::value; - -// Utility for converting a swizzle to a vector or preserve the type if it isn't -// a swizzle. -template struct simplify_if_swizzle { - using type = T; -}; - -template -using simplify_if_swizzle_t = typename simplify_if_swizzle::type; - -// Utility trait for getting the decoration of a multi_ptr. -template struct get_multi_ptr_decoration; -template -struct get_multi_ptr_decoration< - multi_ptr> { - static constexpr access::decorated value = DecorateAddress; -}; - -template -constexpr access::decorated get_multi_ptr_decoration_v = - get_multi_ptr_decoration::value; - -// Utility trait for checking if a multi_ptr has a "writable" address space, -// i.e. global, local, private or generic. -template struct has_writeable_addr_space : std::false_type {}; -template -struct has_writeable_addr_space> - : std::bool_constant {}; - -template -constexpr bool has_writeable_addr_space_v = has_writeable_addr_space::value; - -} // namespace detail -} // namespace _V1 -} // namespace sycl diff --git a/sycl/include/sycl/builtins_utils_vec.hpp b/sycl/include/sycl/builtins_utils_vec.hpp deleted file mode 100644 index 3fd2336008654..0000000000000 --- a/sycl/include/sycl/builtins_utils_vec.hpp +++ /dev/null @@ -1,79 +0,0 @@ -//==--- builtins_utils_vec.hpp - SYCL built-in function utilities for vec --==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include - -#include - -#include -#include -#include // for marray -#include // for vec - -namespace sycl { -inline namespace _V1 { -namespace detail { -// Utility for converting a swizzle to a vector or preserve the type if it isn't -// a swizzle. -template class OperationCurrentT, int... Indexes> -struct simplify_if_swizzle> { - using type = vec; -}; - -// Utility trait for changing the element type of a type T. If T is a scalar, -// the new type replaces T completely. -template struct change_elements { - using type = NewElemT; -}; -template -struct change_elements> { - using type = marray::type, N>; -}; -template -struct change_elements> { - using type = vec::type, N>; -}; -template class OperationCurrentT, - int... Indexes> -struct change_elements> { - // Converts to vec for simplicity. - using type = - vec::type, - sizeof...(Indexes)>; -}; - -template -using change_elements_t = typename change_elements::type; - -// Utility functions for converting to/from vec/marray. -template vec to_vec2(marray X, size_t Start) { - return {X[Start], X[Start + 1]}; -} -template vec to_vec(marray X) { - vec Vec; - for (size_t I = 0; I < N; I++) - Vec[I] = X[I]; - return Vec; -} -template marray to_marray(vec X) { - marray Marray; - for (size_t I = 0; I < N; I++) - Marray[I] = X[I]; - return Marray; -} - -} // namespace detail -} // namespace _V1 -} // namespace sycl diff --git a/sycl/include/sycl/detail/builtins/builtins.hpp b/sycl/include/sycl/detail/builtins/builtins.hpp index 1f291fc4f60ac..4ce211cb1657b 100644 --- a/sycl/include/sycl/detail/builtins/builtins.hpp +++ b/sycl/include/sycl/detail/builtins/builtins.hpp @@ -63,11 +63,73 @@ #pragma once -#include +#include +#include +#include +#include // for marray +#include // for vec namespace sycl { inline namespace _V1 { namespace detail { +#ifdef __FAST_MATH__ +template +struct use_fast_math + : std::is_same>, float> {}; +#else +template struct use_fast_math : std::false_type {}; +#endif +template constexpr bool use_fast_math_v = use_fast_math::value; + +// Utility trait for getting the decoration of a multi_ptr. +template struct get_multi_ptr_decoration; +template +struct get_multi_ptr_decoration< + multi_ptr> { + static constexpr access::decorated value = DecorateAddress; +}; + +template +constexpr access::decorated get_multi_ptr_decoration_v = + get_multi_ptr_decoration::value; + +// Utility trait for checking if a multi_ptr has a "writable" address space, +// i.e. global, local, private or generic. +template struct has_writeable_addr_space : std::false_type {}; +template +struct has_writeable_addr_space> + : std::bool_constant {}; + +template +constexpr bool has_writeable_addr_space_v = has_writeable_addr_space::value; + +// Utility trait for changing the element type of a type T. If T is a scalar, +// the new type replaces T completely. +template +struct change_elements { + using type = NewElemT; +}; +template +struct change_elements>> { + using type = + marray::type, + T::size()>; +}; +template +struct change_elements>> { + using type = + vec::type, + T::size()>; +}; + +template +using change_elements_t = typename change_elements::type; + template inline constexpr bool builtin_same_shape_v = ((... && is_scalar_arithmetic_v) || (... && is_marray_v) || @@ -80,6 +142,23 @@ inline constexpr bool builtin_same_or_swizzle_v = // Use builtin_same_shape_v to filter out types unrelated to builtins. builtin_same_shape_v && all_same_v...>; +// Utility functions for converting to/from vec/marray. +template vec to_vec2(marray X, size_t Start) { + return {X[Start], X[Start + 1]}; +} +template vec to_vec(marray X) { + vec Vec; + for (size_t I = 0; I < N; I++) + Vec[I] = X[I]; + return Vec; +} +template marray to_marray(vec X) { + marray Marray; + for (size_t I = 0; I < N; I++) + Marray[I] = X[I]; + return Marray; +} + namespace builtins { #ifdef __SYCL_DEVICE_ONLY__ template auto convert_arg(T &&x) { diff --git a/sycl/include/sycl/detail/type_traits/vec_marray_traits.hpp b/sycl/include/sycl/detail/type_traits/vec_marray_traits.hpp index 86e8764dc87a0..6ce39bf6a072a 100644 --- a/sycl/include/sycl/detail/type_traits/vec_marray_traits.hpp +++ b/sycl/include/sycl/detail/type_traits/vec_marray_traits.hpp @@ -24,6 +24,22 @@ template class OperationCurrentT, int... Indexes> class SwizzleOp; +// Utility for converting a swizzle to a vector or preserve the type if it isn't +// a swizzle. +template struct simplify_if_swizzle { + using type = T; +}; + +template class OperationCurrentT, int... Indexes> +struct simplify_if_swizzle> { + using type = vec; +}; + +template +using simplify_if_swizzle_t = typename simplify_if_swizzle::type; + // --------- is_* traits ------------------ // template struct is_vec : std::false_type {}; template struct is_vec> : std::true_type {}; diff --git a/sycl/include/sycl/ext/oneapi/experimental/bfloat16_math.hpp b/sycl/include/sycl/ext/oneapi/experimental/bfloat16_math.hpp index 11dec596b1f45..49ed23008607e 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/bfloat16_math.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/bfloat16_math.hpp @@ -9,7 +9,6 @@ #pragma once #include // for ceil, cos, exp, exp10, exp2 -#include // For simplify_if_swizzle, is_swizzle #include // sycl::detail::memcpy #include #include // for bfloat16, bfloat16ToBits diff --git a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp index d42df1fee26c8..e6fdba4cdad45 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp @@ -10,7 +10,6 @@ #include // for half #include // for to_vec2 -#include // for to_vec, to_marray... #include // for __SYCL_ALWAYS_INLINE #include // for is_svgenfloath, is_sv... #include // detail::memcpy From 8b83ba8d183eb9e73e54b614d128cbede0c093b9 Mon Sep 17 00:00:00 2001 From: aelovikov-intel Date: Mon, 10 Feb 2025 06:48:06 -0800 Subject: [PATCH 03/13] [NFC][SYCL] Minor refactoring in `sycl::vec` (#16934) We're planning big refactoring under preview breaking changes mode to implement latest changes to the SYCL specification (not yet merged but already reviewed with Khronos). This is of several patches to split the changes into NFC refactoring and localized functional changes under preview guard. --- .../sycl/detail/named_swizzles_mixin.hpp | 9 ++- sycl/include/sycl/detail/vector_arith.hpp | 13 ++++ sycl/include/sycl/vector.hpp | 55 +++++++-------- .../vector/vector_convert_bfloat.cpp | 68 +++++++++---------- 4 files changed, 80 insertions(+), 65 deletions(-) diff --git a/sycl/include/sycl/detail/named_swizzles_mixin.hpp b/sycl/include/sycl/detail/named_swizzles_mixin.hpp index f35656b2e6381..2700a0adf49cf 100644 --- a/sycl/include/sycl/detail/named_swizzles_mixin.hpp +++ b/sycl/include/sycl/detail/named_swizzles_mixin.hpp @@ -18,6 +18,9 @@ namespace sycl { inline namespace _V1 { namespace detail { +// Will be defined in another header. +template struct from_incomplete; + #ifndef SYCL_SIMPLE_SWIZZLES #define __SYCL_SWIZZLE_MIXIN_SIMPLE_SWIZZLES #else @@ -785,7 +788,8 @@ namespace detail { return (*static_cast(this))[INDEX]; \ } -template struct NamedSwizzlesMixinConst { +template ::size()> +struct NamedSwizzlesMixinConst { #define __SYCL_SWIZZLE_MIXIN_METHOD(COND, NAME, ...) \ __SYCL_SWIZZLE_MIXIN_METHOD_CONST(COND, NAME, __VA_ARGS__) @@ -798,7 +802,8 @@ template struct NamedSwizzlesMixinConst { #undef __SYCL_SWIZZLE_MIXIN_METHOD }; -template struct NamedSwizzlesMixinBoth { +template ::size()> +struct NamedSwizzlesMixinBoth { #define __SYCL_SWIZZLE_MIXIN_METHOD(COND, NAME, ...) \ __SYCL_SWIZZLE_MIXIN_METHOD_NON_CONST(COND, NAME, __VA_ARGS__) \ __SYCL_SWIZZLE_MIXIN_METHOD_CONST(COND, NAME, __VA_ARGS__) diff --git a/sycl/include/sycl/detail/vector_arith.hpp b/sycl/include/sycl/detail/vector_arith.hpp index ac80bd916591d..e628ebb1ae260 100644 --- a/sycl/include/sycl/detail/vector_arith.hpp +++ b/sycl/include/sycl/detail/vector_arith.hpp @@ -21,6 +21,19 @@ template class __SYCL_EBO vec; namespace detail { +template struct from_incomplete; +template +struct from_incomplete : public from_incomplete {}; + +template +struct from_incomplete> { + using element_type = DataT; + static constexpr size_t size() { return NumElements; } +}; + +template struct ApplyIf {}; +template struct ApplyIf : Mixin {}; + // We use std::plus and similar to "map" template parameter to an // overloaded operator. These three below are missing from ``. struct ShiftLeft { diff --git a/sycl/include/sycl/vector.hpp b/sycl/include/sycl/vector.hpp index bb50fcdb5d754..67d00bd9ea7de 100644 --- a/sycl/include/sycl/vector.hpp +++ b/sycl/include/sycl/vector.hpp @@ -113,12 +113,11 @@ template class GetOp { // // must go throw `v.x()` returning a swizzle, then its `operator==` returning // vec and we want that code to compile. -template -struct ScalarConversionOperatorMixIn {}; +template class ScalarConversionOperatorMixIn { + using T = typename from_incomplete::element_type; -template -struct ScalarConversionOperatorMixIn> { - operator T() const { return (*static_cast(this))[0]; } +public: + operator T() const { return (*static_cast(this))[0]; } }; template @@ -134,10 +133,10 @@ inline constexpr bool is_fundamental_or_half_or_bfloat16 = template class __SYCL_EBO vec : public detail::vec_arith, - public detail::ScalarConversionOperatorMixIn, - DataT, NumElements>, - public detail::NamedSwizzlesMixinBoth, - NumElements> { + public detail::ApplyIf< + NumElements == 1, + detail::ScalarConversionOperatorMixIn>>, + public detail::NamedSwizzlesMixinBoth> { static_assert(std::is_same_v>, "DataT must be cv-unqualified"); @@ -177,6 +176,24 @@ class __SYCL_EBO vec element_type_for_vector_t __attribute__(( ext_vector_type(NumElements)))>; + // Make it a template to avoid ambiguity with `vec(const DataT &)` when + // `vector_t` is the same as `DataT`. Not that the other ctor isn't a template + // so we don't even need a smart `enable_if` condition here, the mere fact of + // this being a template makes the other ctor preferred. + template < + typename vector_t_ = vector_t, + typename = typename std::enable_if_t>> + constexpr vec(vector_t_ openclVector) { + m_Data = sycl::bit_cast(openclVector); + } + + /* @SYCL2020 + * Available only when: compiled for the device. + * Converts this SYCL vec instance to the underlying backend-native vector + * type defined by vector_t. + */ + operator vector_t() const { return sycl::bit_cast(m_Data); } + private: #endif // __SYCL_DEVICE_ONLY__ @@ -299,26 +316,6 @@ class __SYCL_EBO vec return *this; } -#ifdef __SYCL_DEVICE_ONLY__ - // Make it a template to avoid ambiguity with `vec(const DataT &)` when - // `vector_t` is the same as `DataT`. Not that the other ctor isn't a template - // so we don't even need a smart `enable_if` condition here, the mere fact of - // this being a template makes the other ctor preferred. - template < - typename vector_t_ = vector_t, - typename = typename std::enable_if_t>> - constexpr vec(vector_t_ openclVector) { - m_Data = sycl::bit_cast(openclVector); - } - - /* @SYCL2020 - * Available only when: compiled for the device. - * Converts this SYCL vec instance to the underlying backend-native vector - * type defined by vector_t. - */ - operator vector_t() const { return sycl::bit_cast(m_Data); } -#endif // __SYCL_DEVICE_ONLY__ - __SYCL2020_DEPRECATED("get_count() is deprecated, please use size() instead") static constexpr size_t get_count() { return size(); } static constexpr size_t size() noexcept { return NumElements; } diff --git a/sycl/test/check_device_code/vector/vector_convert_bfloat.cpp b/sycl/test/check_device_code/vector/vector_convert_bfloat.cpp index 6e5562b182f1b..11fe56b0b54c3 100644 --- a/sycl/test/check_device_code/vector/vector_convert_bfloat.cpp +++ b/sycl/test/check_device_code/vector/vector_convert_bfloat.cpp @@ -18,19 +18,19 @@ using bfloat16 = sycl::ext::oneapi::bfloat16; // CHECK-NEXT: [[VEC_ADDR_I_I_I_I:%.*]] = alloca <3 x i16>, align 8 // CHECK-NEXT: [[DST_I_I_I_I:%.*]] = alloca [4 x float], align 4 // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) -// CHECK-NEXT: [[LOADVEC4_I_I:%.*]] = load <4 x i16>, ptr addrspace(4) [[INP]], align 8, !noalias [[META8]] +// CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x i16>, ptr addrspace(4) [[INP]], align 8, !noalias [[META8]] // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I]]), !noalias [[META8]] // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[DST_I_I_I_I]]), !noalias [[META8]] // CHECK-NEXT: [[VEC_ADDR_ASCAST_I_I_I_I:%.*]] = addrspacecast ptr [[VEC_ADDR_I_I_I_I]] to ptr addrspace(4) // CHECK-NEXT: [[DST_ASCAST_I_I_I_I:%.*]] = addrspacecast ptr [[DST_I_I_I_I]] to ptr addrspace(4) -// CHECK-NEXT: [[EXTRACTVEC_I_I_I_I:%.*]] = shufflevector <4 x i16> [[LOADVEC4_I_I]], <4 x i16> poison, <4 x i32> +// CHECK-NEXT: [[EXTRACTVEC_I_I_I_I:%.*]] = shufflevector <4 x i16> [[LOADVECN_I_I]], <4 x i16> poison, <4 x i32> // CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I_I_I_I]], ptr [[VEC_ADDR_I_I_I_I]], align 8, !tbaa [[TBAA11:![0-9]+]], !noalias [[META8]] // CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec3(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I]]) #[[ATTR4:[0-9]+]], !noalias [[META8]] -// CHECK-NEXT: [[LOADVEC4_I_I_I_I_I:%.*]] = load <4 x float>, ptr [[DST_I_I_I_I]], align 4, !noalias [[META8]] +// CHECK-NEXT: [[LOADVECN_I_I_I_I_I:%.*]] = load <4 x float>, ptr [[DST_I_I_I_I]], align 4, !noalias [[META8]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I]]), !noalias [[META8]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[DST_I_I_I_I]]), !noalias [[META8]] -// CHECK-NEXT: [[EXTRACTVEC4_I:%.*]] = shufflevector <4 x float> [[LOADVEC4_I_I_I_I_I]], <4 x float> poison, <4 x i32> -// CHECK-NEXT: store <4 x float> [[EXTRACTVEC4_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META8]] +// CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <4 x float> [[LOADVECN_I_I_I_I_I]], <4 x float> poison, <4 x i32> +// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META8]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestBFtoFDeviceRNE(vec &inp) { @@ -43,19 +43,19 @@ SYCL_EXTERNAL auto TestBFtoFDeviceRNE(vec &inp) { // CHECK-NEXT: [[VEC_ADDR_I_I_I_I:%.*]] = alloca <3 x i16>, align 8 // CHECK-NEXT: [[DST_I_I_I_I:%.*]] = alloca [4 x float], align 4 // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) -// CHECK-NEXT: [[LOADVEC4_I_I:%.*]] = load <4 x i16>, ptr addrspace(4) [[INP]], align 8, !noalias [[META15]] +// CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x i16>, ptr addrspace(4) [[INP]], align 8, !noalias [[META15]] // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I]]), !noalias [[META15]] // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[DST_I_I_I_I]]), !noalias [[META15]] // CHECK-NEXT: [[VEC_ADDR_ASCAST_I_I_I_I:%.*]] = addrspacecast ptr [[VEC_ADDR_I_I_I_I]] to ptr addrspace(4) // CHECK-NEXT: [[DST_ASCAST_I_I_I_I:%.*]] = addrspacecast ptr [[DST_I_I_I_I]] to ptr addrspace(4) -// CHECK-NEXT: [[EXTRACTVEC_I_I_I_I:%.*]] = shufflevector <4 x i16> [[LOADVEC4_I_I]], <4 x i16> poison, <4 x i32> +// CHECK-NEXT: [[EXTRACTVEC_I_I_I_I:%.*]] = shufflevector <4 x i16> [[LOADVECN_I_I]], <4 x i16> poison, <4 x i32> // CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I_I_I_I]], ptr [[VEC_ADDR_I_I_I_I]], align 8, !tbaa [[TBAA11]], !noalias [[META15]] // CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec3(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I]]) #[[ATTR4]], !noalias [[META15]] -// CHECK-NEXT: [[LOADVEC4_I_I_I_I_I:%.*]] = load <4 x float>, ptr [[DST_I_I_I_I]], align 4, !noalias [[META15]] +// CHECK-NEXT: [[LOADVECN_I_I_I_I_I:%.*]] = load <4 x float>, ptr [[DST_I_I_I_I]], align 4, !noalias [[META15]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I]]), !noalias [[META15]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[DST_I_I_I_I]]), !noalias [[META15]] -// CHECK-NEXT: [[EXTRACTVEC4_I:%.*]] = shufflevector <4 x float> [[LOADVEC4_I_I_I_I_I]], <4 x float> poison, <4 x i32> -// CHECK-NEXT: store <4 x float> [[EXTRACTVEC4_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META15]] +// CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <4 x float> [[LOADVECN_I_I_I_I_I]], <4 x float> poison, <4 x i32> +// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META15]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestBFtoFDeviceRZ(vec &inp) { @@ -66,23 +66,23 @@ SYCL_EXTERNAL auto TestBFtoFDeviceRZ(vec &inp) { // CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.6") align 16 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 8 dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META18:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK-NEXT: [[LOADVEC4_I_I:%.*]] = load <4 x i16>, ptr addrspace(4) [[INP]], align 8, !noalias [[META19]] -// CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <4 x i16> [[LOADVEC4_I_I]], <4 x i16> poison, <3 x i32> +// CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x i16>, ptr addrspace(4) [[INP]], align 8, !noalias [[META19]] +// CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <4 x i16> [[LOADVECN_I_I]], <4 x i16> poison, <3 x i32> // CHECK-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK: for.cond.i.i.i: -// CHECK-NEXT: [[RETVAL1_SROA_0_0_I_I_I:%.*]] = phi <3 x i32> [ undef, [[ENTRY:%.*]] ], [ [[RETVAL1_SROA_0_0_VECBLEND_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-NEXT: [[RETVAL1_SROA_0_0_I_I_I:%.*]] = phi <3 x i32> [ undef, [[ENTRY:%.*]] ], [ [[VECINS_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] // CHECK-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I]] ] // CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 3 // CHECK-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZNK4SYCL3_V13VECINS0_3EXT6ONEAPI8BFLOAT16ELI3EE7CONVERTIILNS_13ROUNDING_MODEE2EEENS1_IT_LI3EEEV_EXIT:%.*]] // CHECK: for.body.i.i.i: // CHECK-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <3 x i16> [[EXTRACTVEC_I_I]], i32 [[I_0_I_I_I]] // CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef i32 @__imf_bfloat162int_rz(i16 noundef zeroext [[VECEXT_I_I_I]]) #[[ATTR4]], !noalias [[META19]] -// CHECK-NEXT: [[VECINS_I_I_I:%.*]] = insertelement <3 x i32> [[RETVAL1_SROA_0_0_I_I_I]], i32 [[CALL_I_I_I_I]], i32 [[I_0_I_I_I]] +// CHECK-NEXT: [[VECINS_I_I_I]] = insertelement <3 x i32> [[RETVAL1_SROA_0_0_I_I_I]], i32 [[CALL_I_I_I_I]], i32 [[I_0_I_I_I]] // CHECK-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 // CHECK-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK: _ZNK4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EE7convertIiLNS_13rounding_modeE2EEENS1_IT_Li3EEEv.exit: -// CHECK-NEXT: [[EXTRACTVEC4_I:%.*]] = shufflevector <3 x i32> [[RETVAL1_SROA_0_0_I_I_I]], <3 x i32> poison, <4 x i32> -// CHECK-NEXT: store <4 x i32> [[EXTRACTVEC4_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META19]] +// CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <3 x i32> [[RETVAL1_SROA_0_0_I_I_I]], <3 x i32> poison, <4 x i32> +// CHECK-NEXT: store <4 x i32> [[EXTRACTVEC_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META19]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestBFtointDeviceRZ(vec &inp) { @@ -108,19 +108,19 @@ SYCL_EXTERNAL auto TestBFtointDeviceRNE(vec &inp) { // CHECK-NEXT: [[VEC_ADDR_I_I_I_I:%.*]] = alloca <3 x float>, align 16 // CHECK-NEXT: [[DST_I_I_I_I:%.*]] = alloca [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META29:![0-9]+]]) -// CHECK-NEXT: [[LOADVEC4_I_I:%.*]] = load <4 x float>, ptr addrspace(4) [[INP]], align 16, !noalias [[META29]] +// CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x float>, ptr addrspace(4) [[INP]], align 16, !noalias [[META29]] // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[VEC_ADDR_I_I_I_I]]), !noalias [[META29]] // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[DST_I_I_I_I]]), !noalias [[META29]] // CHECK-NEXT: [[VEC_ADDR_ASCAST_I_I_I_I:%.*]] = addrspacecast ptr [[VEC_ADDR_I_I_I_I]] to ptr addrspace(4) // CHECK-NEXT: [[DST_ASCAST_I_I_I_I:%.*]] = addrspacecast ptr [[DST_I_I_I_I]] to ptr addrspace(4) -// CHECK-NEXT: [[EXTRACTVEC_I_I_I_I:%.*]] = shufflevector <4 x float> [[LOADVEC4_I_I]], <4 x float> poison, <4 x i32> +// CHECK-NEXT: [[EXTRACTVEC_I_I_I_I:%.*]] = shufflevector <4 x float> [[LOADVECN_I_I]], <4 x float> poison, <4 x i32> // CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I_I_I_I]], ptr [[VEC_ADDR_I_I_I_I]], align 16, !tbaa [[TBAA11]], !noalias [[META29]] // CHECK-NEXT: call spir_func void @__devicelib_ConvertFToBF16INTELVec3(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I]]) #[[ATTR4]], !noalias [[META29]] -// CHECK-NEXT: [[LOADVEC4_I_I_I_I_I:%.*]] = load <4 x i16>, ptr [[DST_I_I_I_I]], align 2, !noalias [[META29]] +// CHECK-NEXT: [[LOADVECN_I_I_I_I_I:%.*]] = load <4 x i16>, ptr [[DST_I_I_I_I]], align 2, !noalias [[META29]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[VEC_ADDR_I_I_I_I]]), !noalias [[META29]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[DST_I_I_I_I]]), !noalias [[META29]] -// CHECK-NEXT: [[EXTRACTVEC4_I:%.*]] = shufflevector <4 x i16> [[LOADVEC4_I_I_I_I_I]], <4 x i16> poison, <4 x i32> -// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC4_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META29]] +// CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <4 x i16> [[LOADVECN_I_I_I_I_I]], <4 x i16> poison, <4 x i32> +// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META29]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestFtoBFDeviceRNE(vec &inp) { @@ -131,23 +131,23 @@ SYCL_EXTERNAL auto TestFtoBFDeviceRNE(vec &inp) { // CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.0") align 8 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 16 dereferenceable(16) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META32:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META33:![0-9]+]]) -// CHECK-NEXT: [[LOADVEC4_I_I:%.*]] = load <4 x float>, ptr addrspace(4) [[INP]], align 16, !noalias [[META33]] -// CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <4 x float> [[LOADVEC4_I_I]], <4 x float> poison, <3 x i32> +// CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x float>, ptr addrspace(4) [[INP]], align 16, !noalias [[META33]] +// CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <4 x float> [[LOADVECN_I_I]], <4 x float> poison, <3 x i32> // CHECK-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK: for.cond.i.i.i: -// CHECK-NEXT: [[RETVAL1_SROA_0_0_I_I_I:%.*]] = phi <3 x i16> [ undef, [[ENTRY:%.*]] ], [ [[RETVAL1_SROA_0_0_VECBLEND_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-NEXT: [[RETVAL1_SROA_0_0_I_I_I:%.*]] = phi <3 x i16> [ undef, [[ENTRY:%.*]] ], [ [[VECINS_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] // CHECK-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I]] ] // CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 3 // CHECK-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZNK4SYCL3_V13VECIFLI3EE7CONVERTINS0_3EXT6ONEAPI8BFLOAT16ELNS_13ROUNDING_MODEE2EEENS1_IT_LI3EEEV_EXIT:%.*]] // CHECK: for.body.i.i.i: // CHECK-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <3 x float> [[EXTRACTVEC_I_I]], i32 [[I_0_I_I_I]] // CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef zeroext i16 @__imf_float2bfloat16_rz(float noundef [[VECEXT_I_I_I]]) #[[ATTR4]], !noalias [[META33]] -// CHECK-NEXT: [[VECINS_I_I_I:%.*]] = insertelement <3 x i16> [[RETVAL1_SROA_0_0_I_I_I]], i16 [[CALL_I_I_I_I]], i32 [[I_0_I_I_I]] +// CHECK-NEXT: [[VECINS_I_I_I]] = insertelement <3 x i16> [[RETVAL1_SROA_0_0_I_I_I]], i16 [[CALL_I_I_I_I]], i32 [[I_0_I_I_I]] // CHECK-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 // CHECK-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK: _ZNK4sycl3_V13vecIfLi3EE7convertINS0_3ext6oneapi8bfloat16ELNS_13rounding_modeE2EEENS1_IT_Li3EEEv.exit: -// CHECK-NEXT: [[EXTRACTVEC4_I:%.*]] = shufflevector <3 x i16> [[RETVAL1_SROA_0_0_I_I_I]], <3 x i16> poison, <4 x i32> -// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC4_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META33]] +// CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <3 x i16> [[RETVAL1_SROA_0_0_I_I_I]], <3 x i16> poison, <4 x i32> +// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META33]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestFtoBFDeviceRZ(vec &inp) { @@ -158,23 +158,23 @@ SYCL_EXTERNAL auto TestFtoBFDeviceRZ(vec &inp) { // CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.0") align 8 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 16 dereferenceable(16) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META37:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META38:![0-9]+]]) -// CHECK-NEXT: [[LOADVEC4_I_I:%.*]] = load <4 x i32>, ptr addrspace(4) [[INP]], align 16, !noalias [[META38]] -// CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <4 x i32> [[LOADVEC4_I_I]], <4 x i32> poison, <3 x i32> +// CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x i32>, ptr addrspace(4) [[INP]], align 16, !noalias [[META38]] +// CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <4 x i32> [[LOADVECN_I_I]], <4 x i32> poison, <3 x i32> // CHECK-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK: for.cond.i.i.i: -// CHECK-NEXT: [[RETVAL1_SROA_0_0_I_I_I:%.*]] = phi <3 x i16> [ undef, [[ENTRY:%.*]] ], [ [[RETVAL1_SROA_0_0_VECBLEND_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-NEXT: [[RETVAL1_SROA_0_0_I_I_I:%.*]] = phi <3 x i16> [ undef, [[ENTRY:%.*]] ], [ [[VECINS_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] // CHECK-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I]] ] // CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 3 // CHECK-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZNK4SYCL3_V13VECIILI3EE7CONVERTINS0_3EXT6ONEAPI8BFLOAT16ELNS_13ROUNDING_MODEE2EEENS1_IT_LI3EEEV_EXIT:%.*]] // CHECK: for.body.i.i.i: // CHECK-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <3 x i32> [[EXTRACTVEC_I_I]], i32 [[I_0_I_I_I]] // CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef zeroext i16 @__imf_int2bfloat16_rz(i32 noundef [[VECEXT_I_I_I]]) #[[ATTR4]], !noalias [[META38]] -// CHECK-NEXT: [[VECINS_I_I_I:%.*]] = insertelement <3 x i16> [[RETVAL1_SROA_0_0_I_I_I]], i16 [[CALL_I_I_I_I]], i32 [[I_0_I_I_I]] +// CHECK-NEXT: [[VECINS_I_I_I]] = insertelement <3 x i16> [[RETVAL1_SROA_0_0_I_I_I]], i16 [[CALL_I_I_I_I]], i32 [[I_0_I_I_I]] // CHECK-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 // CHECK-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK: _ZNK4sycl3_V13vecIiLi3EE7convertINS0_3ext6oneapi8bfloat16ELNS_13rounding_modeE2EEENS1_IT_Li3EEEv.exit: -// CHECK-NEXT: [[EXTRACTVEC4_I:%.*]] = shufflevector <3 x i16> [[RETVAL1_SROA_0_0_I_I_I]], <3 x i16> poison, <4 x i32> -// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC4_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META38]] +// CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <3 x i16> [[RETVAL1_SROA_0_0_I_I_I]], <3 x i16> poison, <4 x i32> +// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META38]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestInttoBFDeviceRZ(vec &inp) { @@ -195,7 +195,7 @@ SYCL_EXTERNAL auto TestLLtoBFDeviceRTP(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z22TestShorttoBFDeviceRTNRN4sycl3_V13vecIsLi2EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.30") align 4 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 4 dereferenceable(4) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.32") align 4 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 4 dereferenceable(4) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META49:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr addrspace(4) [[INP]], align 4, !tbaa [[TBAA11]], !noalias [[META49]] From fcc330e267c9c375f9961284127db031cc145e15 Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Mon, 10 Feb 2025 07:30:02 -0800 Subject: [PATCH 04/13] [SYCL] Allow alignment property to be used for group load/store (#16882) It makes possible to provide alignment property to the load/store operations indicating the known alignment of the pointer. It will allow to avoid expensive dynamic alignment checks. --- .../sycl_ext_oneapi_group_load_store.asciidoc | 62 +- .../oneapi/experimental/group_load_store.hpp | 107 +- sycl/test/check_device_code/group_load.cpp | 1094 ++++++------ .../group_load_store_alignment.cpp | 105 ++ .../group_load_store_native_key.cpp | 28 +- sycl/test/check_device_code/group_store.cpp | 1584 +++++++++-------- .../group_load_store_alignment_negative.cpp | 20 + 7 files changed, 1622 insertions(+), 1378 deletions(-) create mode 100644 sycl/test/check_device_code/group_load_store_alignment.cpp create mode 100644 sycl/test/group_load_store_alignment_negative.cpp diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_group_load_store.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_group_load_store.asciidoc index 4aa58a7df3eac..5d4a19a064ffc 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_group_load_store.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_group_load_store.asciidoc @@ -101,11 +101,13 @@ in the group. and default constructible. * `Properties` is an instance of `sycl::ext::oneapi::experimental::properties` +_Mandates_: If `Properties` contains the `alignment` property, `InputIteratorT` must be a pointer. + _Effects_: Loads single element from `in_iter` to `out` by using the `g` group object to identify memory location as `in_iter` + `g.get_local_linear_id()`. -Properties may provide xref:optimization_properties[assertions] which can -enable better optimizations. +Properties may provide xref:optimization_properties[assertions] or the `alignment` property +which can enable better optimizations. ==== `sycl::vec` Overload @@ -132,6 +134,8 @@ in the group. and default constructible. * `Properties` is an instance of `sycl::ext::oneapi::experimental::properties` +_Mandates_: If `Properties` contains the `alignment` property, `InputIteratorT` must be a pointer. + _Effects_: Loads `N` elements from `in_iter` to `out` using the `g` group object. Properties may specify xref:data_placement[data placement]. @@ -140,8 +144,9 @@ Default data placement is a blocked one: in striped case: `out[i]` = `in_iter[g.get_local_linear_id() + g.get_local_linear_range() * i];` for `i` between `0` and `N`. -Properties may also provide xref:optimization_properties[assertions] which can -enable better optimizations. +Properties may also provide xref:optimization_properties[assertions] or the `alignment` property +which can enable better optimizations. + ==== Fixed-size Array Overload @@ -169,6 +174,8 @@ work-group or sub-group. and default constructible. * `Properties` is an instance of `sycl::ext::oneapi::experimental::properties` +_Mandates_: If `Properties` contains the `alignment` property, `InputIteratorT` must be a pointer. + _Effects_: Loads `ElementsPerWorkItem` elements from `in_iter` to `out` using the `g` group object. Properties may specify xref:data_placement[data placement]. @@ -177,8 +184,9 @@ Default placement is a blocked one: in striped case: `out[i]` = `in_iter[g.get_local_linear_id() + g.get_local_linear_range() * i];` for `i` between `0` and `ElementsPerWorkItem`. -Properties may also provide xref:optimization_properties[assertions] which can -enable better optimizations. +Properties may also provide xref:optimization_properties[assertions] or the `alignment` property +which can enable better optimizations. + === Store API @@ -209,11 +217,13 @@ in the group. and default constructible. * `Properties` is an instance of `sycl::ext::oneapi::experimental::properties` +_Mandates_: If `Properties` contains the `alignment` property, `OutputIteratorT` must be a pointer. + _Effects_: Stores single element `in` to `out_iter` by using the `g` group object to identify memory location as `out_iter` + `g.get_local_linear_id()` -Properties may provide xref:optimization_properties[assertions] which can -enable better optimizations. +Properties may provide xref:optimization_properties[assertions] or the `alignment` property +which can enable better optimizations. ==== `sycl::vec` Overload @@ -241,6 +251,8 @@ in the group. and default constructible. * `Properties` is an instance of `sycl::ext::oneapi::experimental::properties` +_Mandates_: If `Properties` contains the `alignment` property, `OutputIteratorT` must be a pointer. + _Effects_: Stores `N` elements from `in` vec to `out_iter` using the `g` group object. Properties may specify xref:data_placement[data placement]. @@ -249,8 +261,8 @@ Default placement is a blocked one: in striped case: `out_iter[g.get_local_linear_id() + g.get_local_linear_range() * i]` = `in[i];` for `i` between `0` and `N`. -Properties may also provide xref:optimization_properties[assertions] which can -enable better optimizations. +Properties may also provide xref:optimization_properties[assertions] or the `alignment` property +which can enable better optimizations. ==== Fixed-size Array Overload @@ -280,6 +292,8 @@ work-group or sub-group. and default constructible. * `Properties` is an instance of `sycl::ext::oneapi::experimental::properties` +_Mandates_: If `Properties` contains the `alignment` property, `OutputIteratorT` must be a pointer. + _Effects_: Stores `ElementsPerWorkItem` elements from `in` span to `out_iter` using the `g` group object. @@ -289,8 +303,9 @@ Default placement is a blocked one: in striped case: `out_iter[g.get_local_linear_id() + g.get_local_linear_range() * i]` = `in[i];` for `i` between `0` and `ItemsPerWorkItem`. -Properties may also provide xref:optimization_properties[assertions] which can -enable better optimizations. +Properties may also provide xref:optimization_properties[assertions] or the `alignment` property +which can enable better optimizations. + === Data Placement @@ -442,6 +457,23 @@ so the implementation can rely on `get_max_local_range()` range size: If partition is uneven the behavior is undefined. +== Alignment + +If `InputIteratorT`/`OutputIteratorT` is a pointer then the following property can be used +to provide an alignment of the pointer. It can allow to avoid dynamic alignment check. + +```c++ +namespace sycl::ext::oneapi::experimental { +struct alignment_key { + template + using value_t = property_value>; +}; + +template +inline constexpr alignment_key::value_t alignment; +} // namespace sycl::ext::oneapi::experimental +``` + == Usage Example Example shows the simplest case without local memory usage of blocked load @@ -458,8 +490,8 @@ constexpr std::size_t block_count = 2; constexpr std::size_t size = block_count * block_size * items_per_thread; sycl::queue q; -T* input = sycl::malloc_device(size, q); -T* output = sycl::malloc_device(size, q); +T* input = sycl::aligned_alloc_device(16, size, q); +T* output = sycl::aligned_alloc_device(16, size, q); q.submit([&](sycl::handler& cgh) { cgh.parallel_for( @@ -472,7 +504,7 @@ q.submit([&](sycl::handler& cgh) { auto offset = g.get_group_id(0) * g.get_local_range(0) * items_per_thread; - auto props = sycl_exp::properties{sycl_exp::contiguous_memory}; + auto props = sycl_exp::properties{sycl_exp::contiguous_memory, sycl_exp::alignment<16>}; sycl_exp::group_load(g, input + offset, sycl::span{ data }, props); diff --git a/sycl/include/sycl/ext/oneapi/experimental/group_load_store.hpp b/sycl/include/sycl/ext/oneapi/experimental/group_load_store.hpp index 661ebd7e97895..48542c8b66036 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/group_load_store.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/group_load_store.hpp @@ -10,6 +10,7 @@ #pragma once +#include #include #include #include @@ -255,25 +256,29 @@ constexpr auto get_block_op_ptr(IteratorT iter, } } -template -bool is_aligned(IteratorType iter) { +template +bool is_aligned(IteratorType iter, [[maybe_unused]] Properties props) { using value_type = remove_decoration_t< typename std::iterator_traits::value_type>; + + if constexpr (Properties::template has_property()) { + if (Properties::template get_property().value >= + RequiredAlign) + return true; + } + return alignof(value_type) >= RequiredAlign || reinterpret_cast(&*iter) % RequiredAlign == 0; } -} // namespace detail - -// Load API span overload. template std::enable_if_t && detail::is_generic_group_v && is_property_list_v> -group_load(Group g, InputIteratorT in_ptr, - span out, Properties props = {}) { +group_load_impl(Group g, InputIteratorT in_ptr, + span out, Properties props = {}) { constexpr bool blocked = detail::isBlocked(props); using use_naive = detail::merged_properties_t) { - return group_load(g, in_ptr, out, use_naive{}); + return group_load_impl(g, in_ptr, out, use_naive{}); } else { auto ptr = detail::get_block_op_ptr(in_ptr, props); static constexpr auto deduced_address_space = @@ -297,12 +302,12 @@ group_load(Group g, InputIteratorT in_ptr, access::address_space::generic_space) { if (auto local_ptr = detail::dynamic_address_cast< access::address_space::local_space>(ptr)) { - return group_load(g, local_ptr, out, props); + return group_load_impl(g, local_ptr, out, props); } else if (auto global_ptr = detail::dynamic_address_cast< access::address_space::global_space>(ptr)) { - return group_load(g, global_ptr, out, props); + return group_load_impl(g, global_ptr, out, props); } else { - return group_load(g, in_ptr, out, use_naive{}); + return group_load_impl(g, in_ptr, out, use_naive{}); } } else { using value_type = remove_decoration_t< @@ -314,8 +319,8 @@ group_load(Group g, InputIteratorT in_ptr, constexpr int ReqAlign = detail::RequiredAlignment::value; - if (!detail::is_aligned(in_ptr)) - return group_load(g, in_ptr, out, use_naive{}); + if (!detail::is_aligned(in_ptr, props)) + return group_load_impl(g, in_ptr, out, use_naive{}); // We know the pointer is aligned and the address space is known. Do the // optimized load. @@ -353,20 +358,21 @@ group_load(Group g, InputIteratorT in_ptr, } } } else { - return group_load(g, in_ptr, out, use_naive{}); + return group_load_impl(g, in_ptr, out, use_naive{}); } + + return; } } -// Store API span overload. template std::enable_if_t && detail::is_generic_group_v && is_property_list_v> -group_store(Group g, const span in, - OutputIteratorT out_ptr, Properties props = {}) { +group_store_impl(Group g, const span in, + OutputIteratorT out_ptr, Properties props = {}) { constexpr bool blocked = detail::isBlocked(props); using use_naive = detail::merged_properties_t in, group_barrier(g); return; } else if constexpr (!std::is_same_v) { - return group_store(g, in, out_ptr, use_naive{}); + return group_store_impl(g, in, out_ptr, use_naive{}); } else { auto ptr = detail::get_block_op_ptr(out_ptr, props); @@ -390,12 +396,12 @@ group_store(Group g, const span in, access::address_space::generic_space) { if (auto local_ptr = detail::dynamic_address_cast< access::address_space::local_space>(ptr)) { - return group_store(g, in, local_ptr, props); + return group_store_impl(g, in, local_ptr, props); } else if (auto global_ptr = detail::dynamic_address_cast< access::address_space::global_space>(ptr)) { - return group_store(g, in, global_ptr, props); + return group_store_impl(g, in, global_ptr, props); } else { - return group_store(g, in, out_ptr, use_naive{}); + return group_store_impl(g, in, out_ptr, use_naive{}); } } else { using block_info = typename detail::BlockTypeInfo< @@ -406,8 +412,8 @@ group_store(Group g, const span in, constexpr int ReqAlign = detail::RequiredAlignment::value; - if (!detail::is_aligned(out_ptr)) - return group_store(g, in, out_ptr, use_naive{}); + if (!detail::is_aligned(out_ptr, props)) + return group_store_impl(g, in, out_ptr, use_naive{}); std::remove_const_t::value_type>> @@ -424,10 +430,41 @@ group_store(Group g, const span in, sycl::bit_cast(values)); } } else { - return group_store(g, in, out_ptr, use_naive{}); + return group_store_impl(g, in, out_ptr, use_naive{}); } } } +} // namespace detail + +// Load API span overload. +template +std::enable_if_t && + detail::is_generic_group_v && + is_property_list_v> +group_load(Group g, InputIteratorT in_ptr, + span out, Properties props = {}) { + static_assert(std::is_pointer_v || + !Properties::template has_property(), + "group_load requires a pointer if alignment property is used"); + detail::group_load_impl(g, in_ptr, out, props); +} + +// Store API span overload. +template +std::enable_if_t && + detail::is_generic_group_v && + is_property_list_v> +group_store(Group g, const span in, + OutputIteratorT out_ptr, Properties props = {}) { + static_assert(std::is_pointer_v || + !Properties::template has_property(), + "group_store requires a pointer if alignment property is used"); + detail::group_store_impl(g, in, out_ptr, props); +} // Load API scalar. template && is_property_list_v> group_load(Group g, InputIteratorT in_ptr, OutputT &out, Properties properties = {}) { - group_load(g, in_ptr, span(&out, 1), properties); + static_assert(std::is_pointer_v || + !Properties::template has_property(), + "group_load requires a pointer if alignment property is used"); + detail::group_load_impl(g, in_ptr, span(&out, 1), properties); } // Store API scalar. @@ -448,7 +488,11 @@ std::enable_if_t && is_property_list_v> group_store(Group g, const InputT &in, OutputIteratorT out_ptr, Properties properties = {}) { - group_store(g, span(&in, 1), out_ptr, properties); + static_assert(std::is_pointer_v || + !Properties::template has_property(), + "group_store requires a pointer if alignment property is used"); + detail::group_store_impl(g, span(&in, 1), out_ptr, + properties); } // Load API sycl::vec overload. @@ -459,7 +503,10 @@ std::enable_if_t && is_property_list_v> group_load(Group g, InputIteratorT in_ptr, sycl::vec &out, Properties properties = {}) { - group_load(g, in_ptr, span(&out[0], N), properties); + static_assert(std::is_pointer_v || + !Properties::template has_property(), + "group_load requires a pointer if alignment property is used"); + detail::group_load_impl(g, in_ptr, span(&out[0], N), properties); } // Store API sycl::vec overload. @@ -470,7 +517,11 @@ std::enable_if_t && is_property_list_v> group_store(Group g, const sycl::vec &in, OutputIteratorT out_ptr, Properties properties = {}) { - group_store(g, span(&in[0], N), out_ptr, properties); + static_assert(std::is_pointer_v || + !Properties::template has_property(), + "group_store requires a pointer if alignment property is used"); + detail::group_store_impl(g, span(&in[0], N), out_ptr, + properties); } #else diff --git a/sycl/test/check_device_code/group_load.cpp b/sycl/test/check_device_code/group_load.cpp index 5daa907a0a40e..30290db169c6c 100644 --- a/sycl/test/check_device_code/group_load.cpp +++ b/sycl/test/check_device_code/group_load.cpp @@ -80,8 +80,8 @@ SYCL_EXTERNAL void test_naive(sycl::sub_group &sg, plain_ptr p, int &out) { // CHECK-GLOBAL-NEXT: entry: // CHECK-GLOBAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(1) [[P:%.*]], null // CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) -// CHECK-GLOBAL-NEXT: [[CALL5_I_I:%.*]] = tail call spir_func noundef i32 @_Z30__spirv_SubgroupBlockReadINTELIjET_PU3AS1Kj(ptr addrspace(1) noundef nonnull [[P]]) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: store i32 [[CALL5_I_I]], ptr addrspace(4) [[OUT:%.*]], align 4 +// CHECK-GLOBAL-NEXT: [[CALL6_I_I:%.*]] = tail call spir_func noundef i32 @_Z30__spirv_SubgroupBlockReadINTELIjET_PU3AS1Kj(ptr addrspace(1) noundef nonnull [[P]]) #[[ATTR7]] +// CHECK-GLOBAL-NEXT: store i32 [[CALL6_I_I]], ptr addrspace(4) [[OUT:%.*]], align 4 // CHECK-GLOBAL-NEXT: ret void // // CHECK-LOCAL-LABEL: @_ZN7blocked14test_optimizedERN4sycl3_V19sub_groupEPU3AS3iRi( @@ -90,8 +90,8 @@ SYCL_EXTERNAL void test_naive(sycl::sub_group &sg, plain_ptr p, int &out) { // CHECK-LOCAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) // CHECK-LOCAL-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(3) [[P]] to i64 // CHECK-LOCAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP0]], 15 -// CHECK-LOCAL-NEXT: [[CMP_I20_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I20_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] +// CHECK-LOCAL-NEXT: [[CMP_I15_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I15_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] // CHECK-LOCAL: if.then.i.i: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 @@ -102,8 +102,8 @@ SYCL_EXTERNAL void test_naive(sycl::sub_group &sg, plain_ptr p, int &out) { // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3IINS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT2_EEVE4TYPEESR_SP_RSQ_SS__EXIT:%.*]] // CHECK-LOCAL: if.end.i.i: -// CHECK-LOCAL-NEXT: [[CALL5_I_I:%.*]] = tail call spir_func noundef i32 @_Z30__spirv_SubgroupBlockReadINTELIjET_PU3AS3Kj(ptr addrspace(3) noundef nonnull [[P]]) #[[ATTR6]] -// CHECK-LOCAL-NEXT: store i32 [[CALL5_I_I]], ptr addrspace(4) [[OUT]], align 4 +// CHECK-LOCAL-NEXT: [[CALL6_I_I:%.*]] = tail call spir_func noundef i32 @_Z30__spirv_SubgroupBlockReadINTELIjET_PU3AS3Kj(ptr addrspace(3) noundef nonnull [[P]]) #[[ATTR6]] +// CHECK-LOCAL-NEXT: store i32 [[CALL6_I_I]], ptr addrspace(4) [[OUT]], align 4 // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3IINS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT2_EEVE4TYPEESR_SP_RSQ_SS__EXIT]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS3iiNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT2_EEvE4typeESR_SP_RSQ_SS_.exit: // CHECK-LOCAL-NEXT: ret void @@ -118,8 +118,8 @@ SYCL_EXTERNAL void test_optimized(sycl::sub_group &sg, plain_ptr p, // CHECK-GLOBAL-NEXT: entry: // CHECK-GLOBAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(1) [[P:%.*]], null // CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) -// CHECK-GLOBAL-NEXT: [[CALL5_I_I:%.*]] = tail call spir_func noundef i32 @_Z30__spirv_SubgroupBlockReadINTELIjET_PU3AS1Kj(ptr addrspace(1) noundef nonnull [[P]]) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: store i32 [[CALL5_I_I]], ptr addrspace(4) [[OUT:%.*]], align 4 +// CHECK-GLOBAL-NEXT: [[CALL6_I_I:%.*]] = tail call spir_func noundef i32 @_Z30__spirv_SubgroupBlockReadINTELIjET_PU3AS1Kj(ptr addrspace(1) noundef nonnull [[P]]) #[[ATTR7]] +// CHECK-GLOBAL-NEXT: store i32 [[CALL6_I_I]], ptr addrspace(4) [[OUT:%.*]], align 4 // CHECK-GLOBAL-NEXT: ret void // // CHECK-LOCAL-LABEL: @_ZN7blocked27test_contiguous_auto_detectERN4sycl3_V19sub_groupEPU3AS3iRi( @@ -128,8 +128,8 @@ SYCL_EXTERNAL void test_optimized(sycl::sub_group &sg, plain_ptr p, // CHECK-LOCAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) // CHECK-LOCAL-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(3) [[P]] to i64 // CHECK-LOCAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP0]], 15 -// CHECK-LOCAL-NEXT: [[CMP_I20_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I20_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] +// CHECK-LOCAL-NEXT: [[CMP_I15_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I15_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] // CHECK-LOCAL: if.then.i.i: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 @@ -140,8 +140,8 @@ SYCL_EXTERNAL void test_optimized(sycl::sub_group &sg, plain_ptr p, // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3IINS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT2_EEVE4TYPEESP_SN_RSO_SQ__EXIT:%.*]] // CHECK-LOCAL: if.end.i.i: -// CHECK-LOCAL-NEXT: [[CALL5_I_I:%.*]] = tail call spir_func noundef i32 @_Z30__spirv_SubgroupBlockReadINTELIjET_PU3AS3Kj(ptr addrspace(3) noundef nonnull [[P]]) #[[ATTR6]] -// CHECK-LOCAL-NEXT: store i32 [[CALL5_I_I]], ptr addrspace(4) [[OUT]], align 4 +// CHECK-LOCAL-NEXT: [[CALL6_I_I:%.*]] = tail call spir_func noundef i32 @_Z30__spirv_SubgroupBlockReadINTELIjET_PU3AS3Kj(ptr addrspace(3) noundef nonnull [[P]]) #[[ATTR6]] +// CHECK-LOCAL-NEXT: store i32 [[CALL6_I_I]], ptr addrspace(4) [[OUT]], align 4 // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3IINS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT2_EEVE4TYPEESP_SN_RSO_SQ__EXIT]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS3iiNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT2_EEvE4typeESP_SN_RSO_SQ_.exit: // CHECK-LOCAL-NEXT: ret void @@ -185,7 +185,7 @@ using accessor_iter_t = local_accessor::iterator; // CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[AGG_TMP1_I]]) // CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 3, ptr nonnull [[AGG_TMP2_I]]) // CHECK-LOCAL-NEXT: store ptr addrspace(4) [[OUT:%.*]], ptr [[AGG_TMP1_I]], align 8, !tbaa [[TBAA18:![0-9]+]] -// CHECK-LOCAL-NEXT: tail call spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPiiLm1ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_14full_group_keyEJEEENSA_INS8_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEESP_(ptr noundef nonnull byval(%"struct.sycl::_V1::sub_group") align 1 [[AGG_TMP_I]], ptr addrspace(4) noundef [[TMP0]], ptr noundef nonnull byval(%"class.sycl::_V1::span") align 8 [[AGG_TMP1_I]], ptr noundef nonnull byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.15") align 1 [[AGG_TMP2_I]]) #[[ATTR6]] +// CHECK-LOCAL-NEXT: tail call spir_func void @_ZN4sycl3_V13ext6oneapi12experimental6detail15group_load_implINS0_9sub_groupEPiiLm1ENS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_14full_group_keyEJEEENSA_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEESP_(ptr noundef nonnull byval(%"struct.sycl::_V1::sub_group") align 1 [[AGG_TMP_I]], ptr addrspace(4) noundef [[TMP0]], ptr noundef nonnull byval(%"class.sycl::_V1::span") align 8 [[AGG_TMP1_I]], ptr noundef nonnull byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.15") align 1 [[AGG_TMP2_I]]) #[[ATTR6]] // CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) // CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[AGG_TMP1_I]]) // CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 3, ptr nonnull [[AGG_TMP2_I]]) @@ -209,7 +209,7 @@ SYCL_EXTERNAL void test_accessor_iter(sycl::sub_group &sg, // CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[AGG_TMP3_I]]) // CHECK-GLOBAL-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr noundef nonnull align 8 dereferenceable(80) [[AGG_TMP14]], ptr addrspace(4) noundef align 8 dereferenceable(80) [[ITER:%.*]], i64 80, i1 false) // CHECK-GLOBAL-NEXT: store ptr addrspace(4) [[OUT:%.*]], ptr [[AGG_TMP2_I]], align 8, !tbaa [[TBAA21:![0-9]+]] -// CHECK-GLOBAL-NEXT: tail call spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupENS0_6detail17accessor_iteratorIKiLi1EEEiLm1ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSD_INS3_21contiguous_memory_keyEJEEENSD_INS3_14full_group_keyEJEEENSD_INSB_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_SR_NS0_4spanISS_XT2_EEESU_(ptr noundef nonnull byval(%"struct.sycl::_V1::sub_group") align 1 [[AGG_TMP_I]], ptr noundef nonnull byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[AGG_TMP14]], ptr noundef nonnull byval(%"class.sycl::_V1::span") align 8 [[AGG_TMP2_I]], ptr noundef nonnull byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.7") align 1 [[AGG_TMP3_I]]) #[[ATTR7]] +// CHECK-GLOBAL-NEXT: tail call spir_func void @_ZN4sycl3_V13ext6oneapi12experimental6detail15group_load_implINS0_9sub_groupENS0_6detail17accessor_iteratorIKiLi1EEEiLm1ENS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSD_INS3_21contiguous_memory_keyEJEEENSD_INS3_14full_group_keyEJEEENSD_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_SR_NS0_4spanISS_XT2_EEESU_(ptr noundef nonnull byval(%"struct.sycl::_V1::sub_group") align 1 [[AGG_TMP_I]], ptr noundef nonnull byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[AGG_TMP14]], ptr noundef nonnull byval(%"class.sycl::_V1::span") align 8 [[AGG_TMP2_I]], ptr noundef nonnull byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.7") align 1 [[AGG_TMP3_I]]) #[[ATTR7]] // CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 80, ptr nonnull [[AGG_TMP14]]) // CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) // CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[AGG_TMP2_I]]) @@ -226,7 +226,7 @@ SYCL_EXTERNAL void test_accessor_iter(sycl::sub_group &sg, // CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[AGG_TMP1_I]]) // CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[AGG_TMP2_I]]) // CHECK-LOCAL-NEXT: store ptr addrspace(4) [[OUT:%.*]], ptr [[AGG_TMP1_I]], align 8, !tbaa [[TBAA18]] -// CHECK-LOCAL-NEXT: tail call spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPiiLm1ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEENSA_INS8_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESQ_SO_NS0_4spanISP_XT2_EEESR_(ptr noundef nonnull byval(%"struct.sycl::_V1::sub_group") align 1 [[AGG_TMP_I]], ptr addrspace(4) noundef [[TMP0]], ptr noundef nonnull byval(%"class.sycl::_V1::span") align 8 [[AGG_TMP1_I]], ptr noundef nonnull byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.7") align 1 [[AGG_TMP2_I]]) #[[ATTR6]] +// CHECK-LOCAL-NEXT: tail call spir_func void @_ZN4sycl3_V13ext6oneapi12experimental6detail15group_load_implINS0_9sub_groupEPiiLm1ENS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEENSA_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESQ_SO_NS0_4spanISP_XT2_EEESR_(ptr noundef nonnull byval(%"struct.sycl::_V1::sub_group") align 1 [[AGG_TMP_I]], ptr addrspace(4) noundef [[TMP0]], ptr noundef nonnull byval(%"class.sycl::_V1::span") align 8 [[AGG_TMP1_I]], ptr noundef nonnull byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.7") align 1 [[AGG_TMP2_I]]) #[[ATTR6]] // CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) // CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[AGG_TMP1_I]]) // CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[AGG_TMP2_I]]) @@ -245,8 +245,8 @@ SYCL_EXTERNAL void test_accessor_iter_force_optimized(sycl::sub_group &sg, // CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) // CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[P]] to i64 // CHECK-GLOBAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP0]], 3 -// CHECK-GLOBAL-NEXT: [[CMP_I20_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I20_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] +// CHECK-GLOBAL-NEXT: [[CMP_I15_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 +// CHECK-GLOBAL-NEXT: br i1 [[CMP_I15_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] // CHECK-GLOBAL: if.then.i.i: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 @@ -257,8 +257,8 @@ SYCL_EXTERNAL void test_accessor_iter_force_optimized(sycl::sub_group &sg, // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1CCNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT2_EEVE4TYPEESR_SP_RSQ_SS__EXIT:%.*]] // CHECK-GLOBAL: if.end.i.i: -// CHECK-GLOBAL-NEXT: [[CALL5_I_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z30__spirv_SubgroupBlockReadINTELIhET_PU3AS1Kh(ptr addrspace(1) noundef nonnull [[P]]) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: store i8 [[CALL5_I_I]], ptr addrspace(4) [[OUT]], align 1 +// CHECK-GLOBAL-NEXT: [[CALL6_I_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z30__spirv_SubgroupBlockReadINTELIhET_PU3AS1Kh(ptr addrspace(1) noundef nonnull [[P]]) #[[ATTR7]] +// CHECK-GLOBAL-NEXT: store i8 [[CALL6_I_I]], ptr addrspace(4) [[OUT]], align 1 // CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1CCNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT2_EEVE4TYPEESR_SP_RSQ_SS__EXIT]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1ccNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT2_EEvE4typeESR_SP_RSQ_SS_.exit: // CHECK-GLOBAL-NEXT: ret void @@ -269,8 +269,8 @@ SYCL_EXTERNAL void test_accessor_iter_force_optimized(sycl::sub_group &sg, // CHECK-LOCAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) // CHECK-LOCAL-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(3) [[P]] to i64 // CHECK-LOCAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP0]], 15 -// CHECK-LOCAL-NEXT: [[CMP_I20_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I20_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] +// CHECK-LOCAL-NEXT: [[CMP_I15_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I15_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] // CHECK-LOCAL: if.then.i.i: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 @@ -281,8 +281,8 @@ SYCL_EXTERNAL void test_accessor_iter_force_optimized(sycl::sub_group &sg, // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3CCNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT2_EEVE4TYPEESR_SP_RSQ_SS__EXIT:%.*]] // CHECK-LOCAL: if.end.i.i: -// CHECK-LOCAL-NEXT: [[CALL5_I_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z30__spirv_SubgroupBlockReadINTELIhET_PU3AS3Kh(ptr addrspace(3) noundef nonnull [[P]]) #[[ATTR6]] -// CHECK-LOCAL-NEXT: store i8 [[CALL5_I_I]], ptr addrspace(4) [[OUT]], align 1 +// CHECK-LOCAL-NEXT: [[CALL6_I_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z30__spirv_SubgroupBlockReadINTELIhET_PU3AS3Kh(ptr addrspace(3) noundef nonnull [[P]]) #[[ATTR6]] +// CHECK-LOCAL-NEXT: store i8 [[CALL6_I_I]], ptr addrspace(4) [[OUT]], align 1 // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3CCNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT2_EEVE4TYPEESR_SP_RSQ_SS__EXIT]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS3ccNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT2_EEvE4typeESR_SP_RSQ_SS_.exit: // CHECK-LOCAL-NEXT: ret void @@ -298,37 +298,37 @@ SYCL_EXTERNAL void test_runtime_align_check(sycl::sub_group &sg, // CHECK-GLOBAL-NEXT: entry: // CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA27:![0-9]+]] // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// CHECK-GLOBAL-NEXT: [[CMP_I_I:%.*]] = icmp ne ptr addrspace(1) [[P:%.*]], null -// CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I]]) +// CHECK-GLOBAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(1) [[P:%.*]], null +// CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) // CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = ptrtoint ptr addrspace(1) [[P]] to i64 -// CHECK-GLOBAL-NEXT: [[REM_I_I:%.*]] = and i64 [[TMP2]], 3 -// CHECK-GLOBAL-NEXT: [[CMP_I20_I:%.*]] = icmp eq i64 [[REM_I_I]], 0 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I20_I]], label [[IF_END_I:%.*]], label [[IF_THEN_I:%.*]] -// CHECK-GLOBAL: if.then.i: +// CHECK-GLOBAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP2]], 3 +// CHECK-GLOBAL-NEXT: [[CMP_I15_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 +// CHECK-GLOBAL-NEXT: br i1 [[CMP_I15_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] +// CHECK-GLOBAL: if.then.i.i: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META29:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I:%.*]] = shl i32 [[TMP3]], 2 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I:%.*]] -// CHECK-GLOBAL: for.cond.i.i: -// CHECK-GLOBAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-GLOBAL-NEXT: [[CMP_I24_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 4 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I24_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1SSLM4ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_9NAIVE_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_SR_NS0_4SPANISS_XT2_EEESU__EXIT_I:%.*]] -// CHECK-GLOBAL: for.body.i.i: -// CHECK-GLOBAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ADD_I_I_I:%.*]] = or disjoint i32 [[MUL_I_I_I]], [[I_0_I_I]] -// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[P]], i64 [[IDXPROM_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i16, ptr addrspace(1) [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA32:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-GLOBAL-NEXT: store i16 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA32]] -// CHECK-GLOBAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP34:![0-9]+]] -// CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1ssLm4ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_9naive_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_SR_NS0_4spanISS_XT2_EEESU_.exit.i: +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = shl i32 [[TMP3]], 2 +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-GLOBAL: for.cond.i.i.i: +// CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I_I]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-GLOBAL-NEXT: [[CMP_I19_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 4 +// CHECK-GLOBAL-NEXT: br i1 [[CMP_I19_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL15GROUP_LOAD_IMPLINS0_9SUB_GROUPEPU3AS1SSLM4ENS3_10PROPERTIESINS4_20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS4_9NAIVE_KEYEJEEENSB_INS4_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_SR_NS0_4SPANISS_XT2_EEESU__EXIT_I_I:%.*]] +// CHECK-GLOBAL: for.body.i.i.i: +// CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = or disjoint i32 [[MUL_I_I_I_I]], [[I_0_I_I_I]] +// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[P]], i64 [[IDXPROM_I_I_I]] +// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i16, ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA32:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-GLOBAL-NEXT: store i16 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 2, !tbaa [[TBAA32]] +// CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental6detail15group_load_implINS0_9sub_groupEPU3AS1ssLm4ENS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS4_9naive_keyEJEEENSB_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_SR_NS0_4spanISS_XT2_EEESU_.exit.i.i: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1SSLM4ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] -// CHECK-GLOBAL: if.end.i: -// CHECK-GLOBAL-NEXT: [[CALL5_I:%.*]] = tail call spir_func noundef i64 @_Z30__spirv_SubgroupBlockReadINTELImET_PU3AS1Km(ptr addrspace(1) noundef nonnull [[P]]) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: store i64 [[CALL5_I]], ptr addrspace(4) [[TMP1]], align 2 +// CHECK-GLOBAL: if.end.i.i: +// CHECK-GLOBAL-NEXT: [[CALL6_I_I:%.*]] = tail call spir_func noundef i64 @_Z30__spirv_SubgroupBlockReadINTELImET_PU3AS1Km(ptr addrspace(1) noundef nonnull [[P]]) #[[ATTR7]] +// CHECK-GLOBAL-NEXT: store i64 [[CALL6_I_I]], ptr addrspace(4) [[TMP1]], align 2 // CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1SSLM4ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1ssLm4ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEESS_.exit: // CHECK-GLOBAL-NEXT: ret void @@ -337,37 +337,37 @@ SYCL_EXTERNAL void test_runtime_align_check(sycl::sub_group &sg, // CHECK-LOCAL-NEXT: entry: // CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA25:![0-9]+]] // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// CHECK-LOCAL-NEXT: [[CMP_I_I:%.*]] = icmp ne ptr addrspace(3) [[P:%.*]], null -// CHECK-LOCAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I]]) +// CHECK-LOCAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(3) [[P:%.*]], null +// CHECK-LOCAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) // CHECK-LOCAL-NEXT: [[TMP2:%.*]] = ptrtoint ptr addrspace(3) [[P]] to i64 -// CHECK-LOCAL-NEXT: [[REM_I_I:%.*]] = and i64 [[TMP2]], 15 -// CHECK-LOCAL-NEXT: [[CMP_I20_I:%.*]] = icmp eq i64 [[REM_I_I]], 0 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I20_I]], label [[IF_END_I:%.*]], label [[IF_THEN_I:%.*]] -// CHECK-LOCAL: if.then.i: +// CHECK-LOCAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP2]], 15 +// CHECK-LOCAL-NEXT: [[CMP_I15_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I15_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] +// CHECK-LOCAL: if.then.i.i: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META27:![0-9]+]] -// CHECK-LOCAL-NEXT: [[MUL_I_I_I:%.*]] = shl i32 [[TMP3]], 2 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I:%.*]] -// CHECK-LOCAL: for.cond.i.i: -// CHECK-LOCAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-LOCAL-NEXT: [[CMP_I24_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 4 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I24_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3SSLM4ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_9NAIVE_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_SR_NS0_4SPANISS_XT2_EEESU__EXIT_I:%.*]] -// CHECK-LOCAL: for.body.i.i: -// CHECK-LOCAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ADD_I_I_I:%.*]] = or disjoint i32 [[MUL_I_I_I]], [[I_0_I_I]] -// CHECK-LOCAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(3) [[P]], i64 [[IDXPROM_I_I]] -// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i16, ptr addrspace(3) [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA30:![0-9]+]] -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-LOCAL-NEXT: store i16 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA30]] -// CHECK-LOCAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP32:![0-9]+]] -// CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS3ssLm4ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_9naive_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_SR_NS0_4spanISS_XT2_EEESU_.exit.i: +// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = shl i32 [[TMP3]], 2 +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-LOCAL: for.cond.i.i.i: +// CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I_I]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-LOCAL-NEXT: [[CMP_I19_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 4 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I19_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL15GROUP_LOAD_IMPLINS0_9SUB_GROUPEPU3AS3SSLM4ENS3_10PROPERTIESINS4_20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS4_9NAIVE_KEYEJEEENSB_INS4_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_SR_NS0_4SPANISS_XT2_EEESU__EXIT_I_I:%.*]] +// CHECK-LOCAL: for.body.i.i.i: +// CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = or disjoint i32 [[MUL_I_I_I_I]], [[I_0_I_I_I]] +// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(3) [[P]], i64 [[IDXPROM_I_I_I]] +// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i16, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA30:![0-9]+]] +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-LOCAL-NEXT: store i16 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental6detail15group_load_implINS0_9sub_groupEPU3AS3ssLm4ENS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS4_9naive_keyEJEEENSB_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_SR_NS0_4spanISS_XT2_EEESU_.exit.i.i: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3SSLM4ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] -// CHECK-LOCAL: if.end.i: -// CHECK-LOCAL-NEXT: [[CALL5_I:%.*]] = tail call spir_func noundef i64 @_Z30__spirv_SubgroupBlockReadINTELImET_PU3AS3Km(ptr addrspace(3) noundef nonnull [[P]]) #[[ATTR6]] -// CHECK-LOCAL-NEXT: store i64 [[CALL5_I]], ptr addrspace(4) [[TMP1]], align 2 +// CHECK-LOCAL: if.end.i.i: +// CHECK-LOCAL-NEXT: [[CALL6_I_I:%.*]] = tail call spir_func noundef i64 @_Z30__spirv_SubgroupBlockReadINTELImET_PU3AS3Km(ptr addrspace(3) noundef nonnull [[P]]) #[[ATTR6]] +// CHECK-LOCAL-NEXT: store i64 [[CALL6_I_I]], ptr addrspace(4) [[TMP1]], align 2 // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3SSLM4ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS3ssLm4ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEESS_.exit: // CHECK-LOCAL-NEXT: ret void @@ -385,22 +385,22 @@ SYCL_EXTERNAL void test_four_shorts(sycl::sub_group &sg, plain_ptr p, // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META37:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I:%.*]] = mul i32 [[TMP2]], 3 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I:%.*]] -// CHECK-GLOBAL: for.cond.i.i: -// CHECK-GLOBAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-GLOBAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 3 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM3ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] -// CHECK-GLOBAL: for.body.i.i: -// CHECK-GLOBAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[MUL_I_I_I]], [[I_0_I_I]] -// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 [[IDXPROM_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-GLOBAL-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP40:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP2]], 3 +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-GLOBAL: for.cond.i.i.i: +// CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-GLOBAL-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 3 +// CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM3ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] +// CHECK-GLOBAL: for.body.i.i.i: +// CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[MUL_I_I_I_I]], [[I_0_I_I_I]] +// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 [[IDXPROM_I_I_I]] +// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-GLOBAL-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm3ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEESS_.exit: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: ret void @@ -411,22 +411,22 @@ SYCL_EXTERNAL void test_four_shorts(sycl::sub_group &sg, plain_ptr p, // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META35:![0-9]+]] -// CHECK-LOCAL-NEXT: [[MUL_I_I_I:%.*]] = mul i32 [[TMP2]], 3 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I:%.*]] -// CHECK-LOCAL: for.cond.i.i: -// CHECK-LOCAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-LOCAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 3 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3IILM3ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] -// CHECK-LOCAL: for.body.i.i: -// CHECK-LOCAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[MUL_I_I_I]], [[I_0_I_I]] -// CHECK-LOCAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P:%.*]], i64 [[IDXPROM_I_I]] -// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(3) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-LOCAL-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP38:![0-9]+]] +// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP2]], 3 +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-LOCAL: for.cond.i.i.i: +// CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-LOCAL-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 3 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3IILM3ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] +// CHECK-LOCAL: for.body.i.i.i: +// CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[MUL_I_I_I_I]], [[I_0_I_I_I]] +// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P:%.*]], i64 [[IDXPROM_I_I_I]] +// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-LOCAL-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS3iiLm3ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEESS_.exit: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: ret void @@ -443,22 +443,22 @@ SYCL_EXTERNAL void test_non_power_of_two(sycl::sub_group &sg, plain_ptr p, // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META42:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I:%.*]] = shl i32 [[TMP2]], 2 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I:%.*]] -// CHECK-GLOBAL: for.cond.i.i: -// CHECK-GLOBAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-GLOBAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 4 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM4ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] -// CHECK-GLOBAL: for.body.i.i: -// CHECK-GLOBAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ADD_I_I_I:%.*]] = or disjoint i32 [[MUL_I_I_I]], [[I_0_I_I]] -// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 [[IDXPROM_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-GLOBAL-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP45:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = shl i32 [[TMP2]], 2 +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-GLOBAL: for.cond.i.i.i: +// CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-GLOBAL-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 4 +// CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM4ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] +// CHECK-GLOBAL: for.body.i.i.i: +// CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = or disjoint i32 [[MUL_I_I_I_I]], [[I_0_I_I_I]] +// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 [[IDXPROM_I_I_I]] +// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-GLOBAL-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm4ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEESS_.exit: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: ret void @@ -469,22 +469,22 @@ SYCL_EXTERNAL void test_non_power_of_two(sycl::sub_group &sg, plain_ptr p, // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META40:![0-9]+]] -// CHECK-LOCAL-NEXT: [[MUL_I_I_I:%.*]] = shl i32 [[TMP2]], 2 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I:%.*]] -// CHECK-LOCAL: for.cond.i.i: -// CHECK-LOCAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-LOCAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 4 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3IILM4ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] -// CHECK-LOCAL: for.body.i.i: -// CHECK-LOCAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ADD_I_I_I:%.*]] = or disjoint i32 [[MUL_I_I_I]], [[I_0_I_I]] -// CHECK-LOCAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P:%.*]], i64 [[IDXPROM_I_I]] -// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(3) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-LOCAL-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP43:![0-9]+]] +// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = shl i32 [[TMP2]], 2 +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-LOCAL: for.cond.i.i.i: +// CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-LOCAL-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 4 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3IILM4ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] +// CHECK-LOCAL: for.body.i.i.i: +// CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = or disjoint i32 [[MUL_I_I_I_I]], [[I_0_I_I_I]] +// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P:%.*]], i64 [[IDXPROM_I_I_I]] +// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-LOCAL-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS3iiLm4ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEESS_.exit: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: ret void @@ -502,22 +502,22 @@ SYCL_EXTERNAL void test_four_ints(sycl::sub_group &sg, plain_ptr p, // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META47:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I:%.*]] = mul i32 [[TMP2]], 7 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I:%.*]] -// CHECK-GLOBAL: for.cond.i.i: -// CHECK-GLOBAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-GLOBAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 7 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM7ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] -// CHECK-GLOBAL: for.body.i.i: -// CHECK-GLOBAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[MUL_I_I_I]], [[I_0_I_I]] -// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 [[IDXPROM_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-GLOBAL-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP50:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP2]], 7 +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-GLOBAL: for.cond.i.i.i: +// CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-GLOBAL-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 7 +// CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM7ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] +// CHECK-GLOBAL: for.body.i.i.i: +// CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[MUL_I_I_I_I]], [[I_0_I_I_I]] +// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 [[IDXPROM_I_I_I]] +// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-GLOBAL-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm7ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEESS_.exit: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: ret void @@ -528,22 +528,22 @@ SYCL_EXTERNAL void test_four_ints(sycl::sub_group &sg, plain_ptr p, // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META45:![0-9]+]] -// CHECK-LOCAL-NEXT: [[MUL_I_I_I:%.*]] = mul i32 [[TMP2]], 7 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I:%.*]] -// CHECK-LOCAL: for.cond.i.i: -// CHECK-LOCAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-LOCAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 7 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3IILM7ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] -// CHECK-LOCAL: for.body.i.i: -// CHECK-LOCAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[MUL_I_I_I]], [[I_0_I_I]] -// CHECK-LOCAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P:%.*]], i64 [[IDXPROM_I_I]] -// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(3) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-LOCAL-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP48:![0-9]+]] +// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP2]], 7 +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-LOCAL: for.cond.i.i.i: +// CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-LOCAL-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 7 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3IILM7ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] +// CHECK-LOCAL: for.body.i.i.i: +// CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[MUL_I_I_I_I]], [[I_0_I_I_I]] +// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P:%.*]], i64 [[IDXPROM_I_I_I]] +// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-LOCAL-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS3iiLm7ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEESS_.exit: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: ret void @@ -566,22 +566,22 @@ namespace striped { // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META52:![0-9]+]] // CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META55:![0-9]+]] -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I:%.*]] -// CHECK-GLOBAL: for.cond.i: -// CHECK-GLOBAL-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] -// CHECK-GLOBAL-NEXT: [[CMP_I:%.*]] = icmp samesign ult i32 [[I_0_I]], 2 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM2ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS9_9NAIVE_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESP_SN_NS0_4SPANISO_XT2_EEESQ__EXIT:%.*]] -// CHECK-GLOBAL: for.body.i: -// CHECK-GLOBAL-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 -// CHECK-GLOBAL-NEXT: [[MUL_I_I:%.*]] = mul nuw nsw i32 [[TMP3]], [[I_0_I]] -// CHECK-GLOBAL-NEXT: [[ADD_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I]] -// CHECK-GLOBAL-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[ADD_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 [[IDXPROM_I]] -// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-GLOBAL-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP58:![0-9]+]] +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I:%.*]] +// CHECK-GLOBAL: for.cond.i.i: +// CHECK-GLOBAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] +// CHECK-GLOBAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 2 +// CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM2ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS9_9NAIVE_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESP_SN_NS0_4SPANISO_XT2_EEESQ__EXIT:%.*]] +// CHECK-GLOBAL: for.body.i.i: +// CHECK-GLOBAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I:%.*]] = mul nuw nsw i32 [[TMP3]], [[I_0_I_I]] +// CHECK-GLOBAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I]] +// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 [[IDXPROM_I_I]] +// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] +// CHECK-GLOBAL-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm2ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS9_9naive_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESP_SN_NS0_4spanISO_XT2_EEESQ_.exit: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: ret void @@ -593,22 +593,22 @@ namespace striped { // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META50:![0-9]+]] // CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META53:![0-9]+]] -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I:%.*]] -// CHECK-LOCAL: for.cond.i: -// CHECK-LOCAL-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] -// CHECK-LOCAL-NEXT: [[CMP_I:%.*]] = icmp samesign ult i32 [[I_0_I]], 2 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3IILM2ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS9_9NAIVE_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESP_SN_NS0_4SPANISO_XT2_EEESQ__EXIT:%.*]] -// CHECK-LOCAL: for.body.i: -// CHECK-LOCAL-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 -// CHECK-LOCAL-NEXT: [[MUL_I_I:%.*]] = mul nuw nsw i32 [[TMP3]], [[I_0_I]] -// CHECK-LOCAL-NEXT: [[ADD_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I]] -// CHECK-LOCAL-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[ADD_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P:%.*]], i64 [[IDXPROM_I]] -// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-LOCAL-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP56:![0-9]+]] +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I:%.*]] +// CHECK-LOCAL: for.cond.i.i: +// CHECK-LOCAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] +// CHECK-LOCAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 2 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3IILM2ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS9_9NAIVE_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESP_SN_NS0_4SPANISO_XT2_EEESQ__EXIT:%.*]] +// CHECK-LOCAL: for.body.i.i: +// CHECK-LOCAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[MUL_I_I_I:%.*]] = mul nuw nsw i32 [[TMP3]], [[I_0_I_I]] +// CHECK-LOCAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I]] +// CHECK-LOCAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P:%.*]], i64 [[IDXPROM_I_I]] +// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] +// CHECK-LOCAL-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS3iiLm2ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS9_9naive_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESP_SN_NS0_4spanISO_XT2_EEESQ_.exit: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: ret void @@ -623,48 +623,48 @@ SYCL_EXTERNAL void test_naive(sycl::sub_group &sg, plain_ptr p, // CHECK-GLOBAL-NEXT: entry: // CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA15]] // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// CHECK-GLOBAL-NEXT: [[CMP_I_I:%.*]] = icmp ne ptr addrspace(1) [[P:%.*]], null -// CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I]]) -// CHECK-GLOBAL-NEXT: [[CALL5_I:%.*]] = tail call spir_func noundef <2 x i32> @_Z30__spirv_SubgroupBlockReadINTELIDv2_jET_PU3AS1Kj(ptr addrspace(1) noundef nonnull [[P]]) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: store <2 x i32> [[CALL5_I]], ptr addrspace(4) [[TMP1]], align 4 +// CHECK-GLOBAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(1) [[P:%.*]], null +// CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) +// CHECK-GLOBAL-NEXT: [[CALL6_I_I:%.*]] = tail call spir_func noundef <2 x i32> @_Z30__spirv_SubgroupBlockReadINTELIDv2_jET_PU3AS1Kj(ptr addrspace(1) noundef nonnull [[P]]) #[[ATTR7]] +// CHECK-GLOBAL-NEXT: store <2 x i32> [[CALL6_I_I]], ptr addrspace(4) [[TMP1]], align 4 // CHECK-GLOBAL-NEXT: ret void // // CHECK-LOCAL-LABEL: @_ZN7striped14test_optimizedERN4sycl3_V19sub_groupEPU3AS3iNS1_4spanIiLm2EEE( // CHECK-LOCAL-NEXT: entry: // CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA15]] // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// CHECK-LOCAL-NEXT: [[CMP_I_I:%.*]] = icmp ne ptr addrspace(3) [[P:%.*]], null -// CHECK-LOCAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I]]) +// CHECK-LOCAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(3) [[P:%.*]], null +// CHECK-LOCAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) // CHECK-LOCAL-NEXT: [[TMP2:%.*]] = ptrtoint ptr addrspace(3) [[P]] to i64 -// CHECK-LOCAL-NEXT: [[REM_I_I:%.*]] = and i64 [[TMP2]], 15 -// CHECK-LOCAL-NEXT: [[CMP_I20_I:%.*]] = icmp eq i64 [[REM_I_I]], 0 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I20_I]], label [[IF_END_I:%.*]], label [[IF_THEN_I:%.*]] -// CHECK-LOCAL: if.then.i: +// CHECK-LOCAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP2]], 15 +// CHECK-LOCAL-NEXT: [[CMP_I15_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I15_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] +// CHECK-LOCAL: if.then.i.i: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META58:![0-9]+]] // CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META61:![0-9]+]] -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I:%.*]] -// CHECK-LOCAL: for.cond.i.i: -// CHECK-LOCAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-LOCAL-NEXT: [[CMP_I24_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 2 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I24_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3IILM2ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_9NAIVE_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_SR_NS0_4SPANISS_XT2_EEESU__EXIT_I:%.*]] -// CHECK-LOCAL: for.body.i.i: -// CHECK-LOCAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[MUL_I_I_I:%.*]] = mul nuw nsw i32 [[TMP4]], [[I_0_I_I]] -// CHECK-LOCAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I]] -// CHECK-LOCAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P]], i64 [[IDXPROM_I_I]] -// CHECK-LOCAL-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(3) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-LOCAL-NEXT: store i32 [[TMP5]], ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP64:![0-9]+]] -// CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS3iiLm2ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_9naive_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_SR_NS0_4spanISS_XT2_EEESU_.exit.i: +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-LOCAL: for.cond.i.i.i: +// CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I_I]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-LOCAL-NEXT: [[CMP_I19_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 2 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I19_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL15GROUP_LOAD_IMPLINS0_9SUB_GROUPEPU3AS3IILM2ENS3_10PROPERTIESINS4_20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS4_9NAIVE_KEYEJEEENSB_INS4_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_SR_NS0_4SPANISS_XT2_EEESU__EXIT_I_I:%.*]] +// CHECK-LOCAL: for.body.i.i.i: +// CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul nuw nsw i32 [[TMP4]], [[I_0_I_I_I]] +// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I_I]] +// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P]], i64 [[IDXPROM_I_I_I]] +// CHECK-LOCAL-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-LOCAL-NEXT: store i32 [[TMP5]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP64:![0-9]+]] +// CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental6detail15group_load_implINS0_9sub_groupEPU3AS3iiLm2ENS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS4_9naive_keyEJEEENSB_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_SR_NS0_4spanISS_XT2_EEESU_.exit.i.i: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3IILM2ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] -// CHECK-LOCAL: if.end.i: -// CHECK-LOCAL-NEXT: [[CALL5_I:%.*]] = tail call spir_func noundef <2 x i32> @_Z30__spirv_SubgroupBlockReadINTELIDv2_jET_PU3AS3Kj(ptr addrspace(3) noundef nonnull [[P]]) #[[ATTR6]] -// CHECK-LOCAL-NEXT: store <2 x i32> [[CALL5_I]], ptr addrspace(4) [[TMP1]], align 4 +// CHECK-LOCAL: if.end.i.i: +// CHECK-LOCAL-NEXT: [[CALL6_I_I:%.*]] = tail call spir_func noundef <2 x i32> @_Z30__spirv_SubgroupBlockReadINTELIDv2_jET_PU3AS3Kj(ptr addrspace(3) noundef nonnull [[P]]) #[[ATTR6]] +// CHECK-LOCAL-NEXT: store <2 x i32> [[CALL6_I_I]], ptr addrspace(4) [[TMP1]], align 4 // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3IILM2ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS3iiLm2ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEESS_.exit: // CHECK-LOCAL-NEXT: ret void @@ -679,8 +679,8 @@ SYCL_EXTERNAL void test_optimized(sycl::sub_group &sg, plain_ptr p, // CHECK-GLOBAL-NEXT: entry: // CHECK-GLOBAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(1) [[P:%.*]], null // CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) -// CHECK-GLOBAL-NEXT: [[CALL5_I_I:%.*]] = tail call spir_func noundef i32 @_Z30__spirv_SubgroupBlockReadINTELIjET_PU3AS1Kj(ptr addrspace(1) noundef nonnull [[P]]) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: store i32 [[CALL5_I_I]], ptr addrspace(4) [[OUT:%.*]], align 4 +// CHECK-GLOBAL-NEXT: [[CALL6_I_I:%.*]] = tail call spir_func noundef i32 @_Z30__spirv_SubgroupBlockReadINTELIjET_PU3AS1Kj(ptr addrspace(1) noundef nonnull [[P]]) #[[ATTR7]] +// CHECK-GLOBAL-NEXT: store i32 [[CALL6_I_I]], ptr addrspace(4) [[OUT:%.*]], align 4 // CHECK-GLOBAL-NEXT: ret void // // CHECK-LOCAL-LABEL: @_ZN7striped27test_contiguous_auto_detectERN4sycl3_V19sub_groupEPU3AS3iRi( @@ -689,8 +689,8 @@ SYCL_EXTERNAL void test_optimized(sycl::sub_group &sg, plain_ptr p, // CHECK-LOCAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) // CHECK-LOCAL-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(3) [[P]] to i64 // CHECK-LOCAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP0]], 15 -// CHECK-LOCAL-NEXT: [[CMP_I20_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I20_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] +// CHECK-LOCAL-NEXT: [[CMP_I15_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I15_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] // CHECK-LOCAL: if.then.i.i: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 @@ -701,8 +701,8 @@ SYCL_EXTERNAL void test_optimized(sycl::sub_group &sg, plain_ptr p, // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3IINS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT2_EEVE4TYPEESP_SN_RSO_SQ__EXIT:%.*]] // CHECK-LOCAL: if.end.i.i: -// CHECK-LOCAL-NEXT: [[CALL5_I_I:%.*]] = tail call spir_func noundef i32 @_Z30__spirv_SubgroupBlockReadINTELIjET_PU3AS3Kj(ptr addrspace(3) noundef nonnull [[P]]) #[[ATTR6]] -// CHECK-LOCAL-NEXT: store i32 [[CALL5_I_I]], ptr addrspace(4) [[OUT]], align 4 +// CHECK-LOCAL-NEXT: [[CALL6_I_I:%.*]] = tail call spir_func noundef i32 @_Z30__spirv_SubgroupBlockReadINTELIjET_PU3AS3Kj(ptr addrspace(3) noundef nonnull [[P]]) #[[ATTR6]] +// CHECK-LOCAL-NEXT: store i32 [[CALL6_I_I]], ptr addrspace(4) [[OUT]], align 4 // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3IINS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT2_EEVE4TYPEESP_SN_RSO_SQ__EXIT]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS3iiNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT2_EEvE4typeESP_SN_RSO_SQ_.exit: // CHECK-LOCAL-NEXT: ret void @@ -731,35 +731,41 @@ using accessor_iter_t = local_accessor::iterator; // CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META62:![0-9]+]] // CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META65:![0-9]+]] // CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr addrspace(4) [[AGG_TMP1_SROA_0_0_COPYLOAD]], i64 [[AGG_TMP1_SROA_2_0_COPYLOAD]] -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I:%.*]] -// CHECK-GLOBAL: for.cond.i.i: -// CHECK-GLOBAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-GLOBAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 2 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPENS0_6DETAIL17ACCESSOR_ITERATORIKILI1EEEILM2ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSD_INS3_14FULL_GROUP_KEYEJEEENSD_INSB_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] -// CHECK-GLOBAL: for.body.i.i: -// CHECK-GLOBAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I:%.*]] = mul nuw nsw i32 [[TMP3]], [[I_0_I_I]] -// CHECK-GLOBAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I]] -// CHECK-GLOBAL-NEXT: [[CONV3_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ADD_PTR_I_I_I_I:%.*]] = getelementptr i32, ptr addrspace(4) [[TMP4]], i64 [[CONV3_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[ADD_PTR_I_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-GLOBAL-NEXT: store i32 [[TMP5]], ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP68:![0-9]+]] +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-GLOBAL: for.cond.i.i.i: +// CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-GLOBAL-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 2 +// CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPENS0_6DETAIL17ACCESSOR_ITERATORIKILI1EEEILM2ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSD_INS3_14FULL_GROUP_KEYEJEEENSD_INSB_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] +// CHECK-GLOBAL: for.body.i.i.i: +// CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul nuw nsw i32 [[TMP3]], [[I_0_I_I_I]] +// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I_I]] +// CHECK-GLOBAL-NEXT: [[CONV3_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[ADD_PTR_I_I_I_I_I:%.*]] = getelementptr i32, ptr addrspace(4) [[TMP4]], i64 [[CONV3_I_I_I]] +// CHECK-GLOBAL-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[ADD_PTR_I_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-GLOBAL-NEXT: store i32 [[TMP5]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP68:![0-9]+]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupENS0_6detail17accessor_iteratorIKiLi1EEEiLm2ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSD_INS3_14full_group_keyEJEEENSD_INSB_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEESS_.exit: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: ret void // // CHECK-LOCAL-LABEL: @_ZN7striped18test_accessor_iterERN4sycl3_V19sub_groupERPiNS1_4spanIiLm2EEE( // CHECK-LOCAL-NEXT: entry: -// CHECK-LOCAL-NEXT: [[AGG_TMP:%.*]] = alloca %"struct.sycl::_V1::sub_group", align 1 -// CHECK-LOCAL-NEXT: [[AGG_TMP1:%.*]] = alloca %"class.sycl::_V1::span.22", align 8 -// CHECK-LOCAL-NEXT: [[AGG_TMP2:%.*]] = alloca %"class.sycl::_V1::ext::oneapi::experimental::properties.30", align 1 +// CHECK-LOCAL-NEXT: [[AGG_TMP_I:%.*]] = alloca %"struct.sycl::_V1::sub_group", align 1 +// CHECK-LOCAL-NEXT: [[AGG_TMP1_I:%.*]] = alloca %"class.sycl::_V1::span.22", align 8 +// CHECK-LOCAL-NEXT: [[AGG_TMP2_I:%.*]] = alloca %"class.sycl::_V1::ext::oneapi::experimental::properties.30", align 1 // CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ITER:%.*]], align 8, !tbaa [[TBAA15]] // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA15]] -// CHECK-LOCAL-NEXT: store i64 [[TMP1]], ptr [[AGG_TMP1]], align 8, !tbaa [[TBAA15]] -// CHECK-LOCAL-NEXT: tail call spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPiiLm2ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_14full_group_keyEJEEENSA_INS8_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEESP_(ptr noundef nonnull byval(%"struct.sycl::_V1::sub_group") align 1 [[AGG_TMP]], ptr addrspace(4) noundef [[TMP0]], ptr noundef nonnull byval(%"class.sycl::_V1::span.22") align 8 [[AGG_TMP1]], ptr noundef nonnull byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.30") align 1 [[AGG_TMP2]]) #[[ATTR6]] +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[AGG_TMP1_I]]) +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 3, ptr nonnull [[AGG_TMP2_I]]) +// CHECK-LOCAL-NEXT: store i64 [[TMP1]], ptr [[AGG_TMP1_I]], align 8, !tbaa [[TBAA15]] +// CHECK-LOCAL-NEXT: tail call spir_func void @_ZN4sycl3_V13ext6oneapi12experimental6detail15group_load_implINS0_9sub_groupEPiiLm2ENS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_14full_group_keyEJEEENSA_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESO_SM_NS0_4spanISN_XT2_EEESP_(ptr noundef nonnull byval(%"struct.sycl::_V1::sub_group") align 1 [[AGG_TMP_I]], ptr addrspace(4) noundef [[TMP0]], ptr noundef nonnull byval(%"class.sycl::_V1::span.22") align 8 [[AGG_TMP1_I]], ptr noundef nonnull byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.30") align 1 [[AGG_TMP2_I]]) #[[ATTR6]] +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[AGG_TMP1_I]]) +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 3, ptr nonnull [[AGG_TMP2_I]]) // CHECK-LOCAL-NEXT: ret void // SYCL_EXTERNAL void test_accessor_iter(sycl::sub_group &sg, @@ -770,25 +776,39 @@ SYCL_EXTERNAL void test_accessor_iter(sycl::sub_group &sg, // CHECK-GLOBAL-LABEL: @_ZN7striped34test_accessor_iter_force_optimizedERN4sycl3_V19sub_groupERNS1_6detail17accessor_iteratorIKiLi1EEENS1_4spanIiLm2EEE( // CHECK-GLOBAL-NEXT: entry: -// CHECK-GLOBAL-NEXT: [[AGG_TMP:%.*]] = alloca %"struct.sycl::_V1::sub_group", align 1 -// CHECK-GLOBAL-NEXT: [[AGG_TMP1:%.*]] = alloca %"class.sycl::_V1::detail::accessor_iterator", align 8 -// CHECK-GLOBAL-NEXT: [[AGG_TMP2:%.*]] = alloca %"class.sycl::_V1::span.22", align 8 -// CHECK-GLOBAL-NEXT: [[AGG_TMP3:%.*]] = alloca %"class.sycl::_V1::ext::oneapi::experimental::properties.28", align 1 -// CHECK-GLOBAL-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr align 8 [[AGG_TMP1]], ptr addrspace(4) align 8 [[ITER:%.*]], i64 80, i1 false), !tbaa.struct [[TBAA_STRUCT70:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[AGG_TMP_I:%.*]] = alloca %"struct.sycl::_V1::sub_group", align 1 +// CHECK-GLOBAL-NEXT: [[AGG_TMP2_I:%.*]] = alloca %"class.sycl::_V1::span.22", align 8 +// CHECK-GLOBAL-NEXT: [[AGG_TMP3_I:%.*]] = alloca %"class.sycl::_V1::ext::oneapi::experimental::properties.28", align 1 +// CHECK-GLOBAL-NEXT: [[AGG_TMP15:%.*]] = alloca %"class.sycl::_V1::detail::accessor_iterator", align 8 // CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA15]] -// CHECK-GLOBAL-NEXT: store i64 [[TMP0]], ptr [[AGG_TMP2]], align 8, !tbaa [[TBAA15]] -// CHECK-GLOBAL-NEXT: tail call spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupENS0_6detail17accessor_iteratorIKiLi1EEEiLm2ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSD_INS3_21contiguous_memory_keyEJEEENSD_INS3_14full_group_keyEJEEENSD_INSB_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_SR_NS0_4spanISS_XT2_EEESU_(ptr noundef nonnull byval(%"struct.sycl::_V1::sub_group") align 1 [[AGG_TMP]], ptr noundef nonnull byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[AGG_TMP1]], ptr noundef nonnull byval(%"class.sycl::_V1::span.22") align 8 [[AGG_TMP2]], ptr noundef nonnull byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.28") align 1 [[AGG_TMP3]]) #[[ATTR7]] +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 80, ptr nonnull [[AGG_TMP15]]) +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[AGG_TMP2_I]]) +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[AGG_TMP3_I]]) +// CHECK-GLOBAL-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr noundef nonnull align 8 dereferenceable(80) [[AGG_TMP15]], ptr addrspace(4) noundef align 8 dereferenceable(80) [[ITER:%.*]], i64 80, i1 false) +// CHECK-GLOBAL-NEXT: store i64 [[TMP0]], ptr [[AGG_TMP2_I]], align 8, !tbaa [[TBAA15]] +// CHECK-GLOBAL-NEXT: tail call spir_func void @_ZN4sycl3_V13ext6oneapi12experimental6detail15group_load_implINS0_9sub_groupENS0_6detail17accessor_iteratorIKiLi1EEEiLm2ENS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSD_INS3_21contiguous_memory_keyEJEEENSD_INS3_14full_group_keyEJEEENSD_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_SR_NS0_4spanISS_XT2_EEESU_(ptr noundef nonnull byval(%"struct.sycl::_V1::sub_group") align 1 [[AGG_TMP_I]], ptr noundef nonnull byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[AGG_TMP15]], ptr noundef nonnull byval(%"class.sycl::_V1::span.22") align 8 [[AGG_TMP2_I]], ptr noundef nonnull byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.28") align 1 [[AGG_TMP3_I]]) #[[ATTR7]] +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 80, ptr nonnull [[AGG_TMP15]]) +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[AGG_TMP2_I]]) +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[AGG_TMP3_I]]) // CHECK-GLOBAL-NEXT: ret void // // CHECK-LOCAL-LABEL: @_ZN7striped34test_accessor_iter_force_optimizedERN4sycl3_V19sub_groupERPiNS1_4spanIiLm2EEE( // CHECK-LOCAL-NEXT: entry: -// CHECK-LOCAL-NEXT: [[AGG_TMP:%.*]] = alloca %"struct.sycl::_V1::sub_group", align 1 -// CHECK-LOCAL-NEXT: [[AGG_TMP1:%.*]] = alloca %"class.sycl::_V1::span.22", align 8 -// CHECK-LOCAL-NEXT: [[AGG_TMP2:%.*]] = alloca %"class.sycl::_V1::ext::oneapi::experimental::properties.28", align 1 +// CHECK-LOCAL-NEXT: [[AGG_TMP_I:%.*]] = alloca %"struct.sycl::_V1::sub_group", align 1 +// CHECK-LOCAL-NEXT: [[AGG_TMP1_I:%.*]] = alloca %"class.sycl::_V1::span.22", align 8 +// CHECK-LOCAL-NEXT: [[AGG_TMP2_I:%.*]] = alloca %"class.sycl::_V1::ext::oneapi::experimental::properties.28", align 1 // CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ITER:%.*]], align 8, !tbaa [[TBAA15]] // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA15]] -// CHECK-LOCAL-NEXT: store i64 [[TMP1]], ptr [[AGG_TMP1]], align 8, !tbaa [[TBAA15]] -// CHECK-LOCAL-NEXT: tail call spir_func void @_ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPiiLm2ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEENSA_INS8_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESQ_SO_NS0_4spanISP_XT2_EEESR_(ptr noundef nonnull byval(%"struct.sycl::_V1::sub_group") align 1 [[AGG_TMP]], ptr addrspace(4) noundef [[TMP0]], ptr noundef nonnull byval(%"class.sycl::_V1::span.22") align 8 [[AGG_TMP1]], ptr noundef nonnull byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.28") align 1 [[AGG_TMP2]]) #[[ATTR6]] +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[AGG_TMP1_I]]) +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[AGG_TMP2_I]]) +// CHECK-LOCAL-NEXT: store i64 [[TMP1]], ptr [[AGG_TMP1_I]], align 8, !tbaa [[TBAA15]] +// CHECK-LOCAL-NEXT: tail call spir_func void @_ZN4sycl3_V13ext6oneapi12experimental6detail15group_load_implINS0_9sub_groupEPiiLm2ENS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSA_INS3_21contiguous_memory_keyEJEEENSA_INS3_14full_group_keyEJEEENSA_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESQ_SO_NS0_4spanISP_XT2_EEESR_(ptr noundef nonnull byval(%"struct.sycl::_V1::sub_group") align 1 [[AGG_TMP_I]], ptr addrspace(4) noundef [[TMP0]], ptr noundef nonnull byval(%"class.sycl::_V1::span.22") align 8 [[AGG_TMP1_I]], ptr noundef nonnull byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.28") align 1 [[AGG_TMP2_I]]) #[[ATTR6]] +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[AGG_TMP1_I]]) +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[AGG_TMP2_I]]) // CHECK-LOCAL-NEXT: ret void // SYCL_EXTERNAL void test_accessor_iter_force_optimized(sycl::sub_group &sg, @@ -800,40 +820,40 @@ SYCL_EXTERNAL void test_accessor_iter_force_optimized(sycl::sub_group &sg, // CHECK-GLOBAL-LABEL: @_ZN7striped24test_runtime_align_checkERN4sycl3_V19sub_groupEPU3AS1cNS1_4spanIcLm2EEE( // CHECK-GLOBAL-NEXT: entry: -// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA90:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA87:![0-9]+]] // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// CHECK-GLOBAL-NEXT: [[CMP_I_I:%.*]] = icmp ne ptr addrspace(1) [[P:%.*]], null -// CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I]]) +// CHECK-GLOBAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(1) [[P:%.*]], null +// CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) // CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = ptrtoint ptr addrspace(1) [[P]] to i64 -// CHECK-GLOBAL-NEXT: [[REM_I_I:%.*]] = and i64 [[TMP2]], 3 -// CHECK-GLOBAL-NEXT: [[CMP_I20_I:%.*]] = icmp eq i64 [[REM_I_I]], 0 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I20_I]], label [[IF_END_I:%.*]], label [[IF_THEN_I:%.*]] -// CHECK-GLOBAL: if.then.i: +// CHECK-GLOBAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP2]], 3 +// CHECK-GLOBAL-NEXT: [[CMP_I15_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 +// CHECK-GLOBAL-NEXT: br i1 [[CMP_I15_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] +// CHECK-GLOBAL: if.then.i.i: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META92:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META95:![0-9]+]] -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I:%.*]] -// CHECK-GLOBAL: for.cond.i.i: -// CHECK-GLOBAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-GLOBAL-NEXT: [[CMP_I24_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 2 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I24_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1CCLM2ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_9NAIVE_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_SR_NS0_4SPANISS_XT2_EEESU__EXIT_I:%.*]] -// CHECK-GLOBAL: for.body.i.i: -// CHECK-GLOBAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I:%.*]] = mul nuw nsw i32 [[TMP4]], [[I_0_I_I]] -// CHECK-GLOBAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I]] -// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[P]], i64 [[IDXPROM_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP5:%.*]] = load i8, ptr addrspace(1) [[ARRAYIDX_I_I]], align 1, !tbaa [[TBAA25]] -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-GLOBAL-NEXT: store i8 [[TMP5]], ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 1, !tbaa [[TBAA25]] -// CHECK-GLOBAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP98:![0-9]+]] -// CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1ccLm2ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_9naive_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_SR_NS0_4spanISS_XT2_EEESU_.exit.i: +// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META89:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META92:![0-9]+]] +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-GLOBAL: for.cond.i.i.i: +// CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I_I]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-GLOBAL-NEXT: [[CMP_I19_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 2 +// CHECK-GLOBAL-NEXT: br i1 [[CMP_I19_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL15GROUP_LOAD_IMPLINS0_9SUB_GROUPEPU3AS1CCLM2ENS3_10PROPERTIESINS4_20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS4_9NAIVE_KEYEJEEENSB_INS4_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_SR_NS0_4SPANISS_XT2_EEESU__EXIT_I_I:%.*]] +// CHECK-GLOBAL: for.body.i.i.i: +// CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul nuw nsw i32 [[TMP4]], [[I_0_I_I_I]] +// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I_I]] +// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[P]], i64 [[IDXPROM_I_I_I]] +// CHECK-GLOBAL-NEXT: [[TMP5:%.*]] = load i8, ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 1, !tbaa [[TBAA25]] +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-GLOBAL-NEXT: store i8 [[TMP5]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 1, !tbaa [[TBAA25]] +// CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP95:![0-9]+]] +// CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental6detail15group_load_implINS0_9sub_groupEPU3AS1ccLm2ENS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS4_9naive_keyEJEEENSB_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_SR_NS0_4spanISS_XT2_EEESU_.exit.i.i: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1CCLM2ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] -// CHECK-GLOBAL: if.end.i: -// CHECK-GLOBAL-NEXT: [[CALL5_I:%.*]] = tail call spir_func noundef <2 x i8> @_Z30__spirv_SubgroupBlockReadINTELIDv2_hET_PU3AS1Kh(ptr addrspace(1) noundef nonnull [[P]]) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: store <2 x i8> [[CALL5_I]], ptr addrspace(4) [[TMP1]], align 1 +// CHECK-GLOBAL: if.end.i.i: +// CHECK-GLOBAL-NEXT: [[CALL6_I_I:%.*]] = tail call spir_func noundef <2 x i8> @_Z30__spirv_SubgroupBlockReadINTELIDv2_hET_PU3AS1Kh(ptr addrspace(1) noundef nonnull [[P]]) #[[ATTR7]] +// CHECK-GLOBAL-NEXT: store <2 x i8> [[CALL6_I_I]], ptr addrspace(4) [[TMP1]], align 1 // CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1CCLM2ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1ccLm2ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEESS_.exit: // CHECK-GLOBAL-NEXT: ret void @@ -842,38 +862,38 @@ SYCL_EXTERNAL void test_accessor_iter_force_optimized(sycl::sub_group &sg, // CHECK-LOCAL-NEXT: entry: // CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA101:![0-9]+]] // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// CHECK-LOCAL-NEXT: [[CMP_I_I:%.*]] = icmp ne ptr addrspace(3) [[P:%.*]], null -// CHECK-LOCAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I]]) +// CHECK-LOCAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(3) [[P:%.*]], null +// CHECK-LOCAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) // CHECK-LOCAL-NEXT: [[TMP2:%.*]] = ptrtoint ptr addrspace(3) [[P]] to i64 -// CHECK-LOCAL-NEXT: [[REM_I_I:%.*]] = and i64 [[TMP2]], 15 -// CHECK-LOCAL-NEXT: [[CMP_I20_I:%.*]] = icmp eq i64 [[REM_I_I]], 0 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I20_I]], label [[IF_END_I:%.*]], label [[IF_THEN_I:%.*]] -// CHECK-LOCAL: if.then.i: +// CHECK-LOCAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP2]], 15 +// CHECK-LOCAL-NEXT: [[CMP_I15_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I15_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] +// CHECK-LOCAL: if.then.i.i: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META103:![0-9]+]] // CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META106:![0-9]+]] -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I:%.*]] -// CHECK-LOCAL: for.cond.i.i: -// CHECK-LOCAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-LOCAL-NEXT: [[CMP_I24_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 2 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I24_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3CCLM2ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_9NAIVE_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_SR_NS0_4SPANISS_XT2_EEESU__EXIT_I:%.*]] -// CHECK-LOCAL: for.body.i.i: -// CHECK-LOCAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[MUL_I_I_I:%.*]] = mul nuw nsw i32 [[TMP4]], [[I_0_I_I]] -// CHECK-LOCAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I]] -// CHECK-LOCAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[P]], i64 [[IDXPROM_I_I]] -// CHECK-LOCAL-NEXT: [[TMP5:%.*]] = load i8, ptr addrspace(3) [[ARRAYIDX_I_I]], align 1, !tbaa [[TBAA23]] -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-LOCAL-NEXT: store i8 [[TMP5]], ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 1, !tbaa [[TBAA23]] -// CHECK-LOCAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP109:![0-9]+]] -// CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS3ccLm2ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_9naive_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_SR_NS0_4spanISS_XT2_EEESU_.exit.i: +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-LOCAL: for.cond.i.i.i: +// CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I_I]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-LOCAL-NEXT: [[CMP_I19_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 2 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I19_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL15GROUP_LOAD_IMPLINS0_9SUB_GROUPEPU3AS3CCLM2ENS3_10PROPERTIESINS4_20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS4_9NAIVE_KEYEJEEENSB_INS4_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_SR_NS0_4SPANISS_XT2_EEESU__EXIT_I_I:%.*]] +// CHECK-LOCAL: for.body.i.i.i: +// CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul nuw nsw i32 [[TMP4]], [[I_0_I_I_I]] +// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I_I]] +// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[P]], i64 [[IDXPROM_I_I_I]] +// CHECK-LOCAL-NEXT: [[TMP5:%.*]] = load i8, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 1, !tbaa [[TBAA23]] +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-LOCAL-NEXT: store i8 [[TMP5]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 1, !tbaa [[TBAA23]] +// CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP109:![0-9]+]] +// CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental6detail15group_load_implINS0_9sub_groupEPU3AS3ccLm2ENS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS4_9naive_keyEJEEENSB_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_SR_NS0_4spanISS_XT2_EEESU_.exit.i.i: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3CCLM2ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] -// CHECK-LOCAL: if.end.i: -// CHECK-LOCAL-NEXT: [[CALL5_I:%.*]] = tail call spir_func noundef <2 x i8> @_Z30__spirv_SubgroupBlockReadINTELIDv2_hET_PU3AS3Kh(ptr addrspace(3) noundef nonnull [[P]]) #[[ATTR6]] -// CHECK-LOCAL-NEXT: store <2 x i8> [[CALL5_I]], ptr addrspace(4) [[TMP1]], align 1 +// CHECK-LOCAL: if.end.i.i: +// CHECK-LOCAL-NEXT: [[CALL6_I_I:%.*]] = tail call spir_func noundef <2 x i8> @_Z30__spirv_SubgroupBlockReadINTELIDv2_hET_PU3AS3Kh(ptr addrspace(3) noundef nonnull [[P]]) #[[ATTR6]] +// CHECK-LOCAL-NEXT: store <2 x i8> [[CALL6_I_I]], ptr addrspace(4) [[TMP1]], align 1 // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3CCLM2ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS3ccLm2ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEESS_.exit: // CHECK-LOCAL-NEXT: ret void @@ -890,38 +910,38 @@ SYCL_EXTERNAL void test_runtime_align_check(sycl::sub_group &sg, // CHECK-GLOBAL-NEXT: entry: // CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA27]] // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// CHECK-GLOBAL-NEXT: [[CMP_I_I:%.*]] = icmp ne ptr addrspace(1) [[P:%.*]], null -// CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I]]) +// CHECK-GLOBAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(1) [[P:%.*]], null +// CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) // CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = ptrtoint ptr addrspace(1) [[P]] to i64 -// CHECK-GLOBAL-NEXT: [[REM_I_I:%.*]] = and i64 [[TMP2]], 3 -// CHECK-GLOBAL-NEXT: [[CMP_I20_I:%.*]] = icmp eq i64 [[REM_I_I]], 0 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I20_I]], label [[IF_END_I:%.*]], label [[IF_THEN_I:%.*]] -// CHECK-GLOBAL: if.then.i: +// CHECK-GLOBAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP2]], 3 +// CHECK-GLOBAL-NEXT: [[CMP_I15_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 +// CHECK-GLOBAL-NEXT: br i1 [[CMP_I15_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] +// CHECK-GLOBAL: if.then.i.i: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META100:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META103:![0-9]+]] -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I:%.*]] -// CHECK-GLOBAL: for.cond.i.i: -// CHECK-GLOBAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-GLOBAL-NEXT: [[CMP_I24_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 4 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I24_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1SSLM4ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_9NAIVE_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_SR_NS0_4SPANISS_XT2_EEESU__EXIT_I:%.*]] -// CHECK-GLOBAL: for.body.i.i: -// CHECK-GLOBAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I:%.*]] = mul i32 [[TMP4]], [[I_0_I_I]] -// CHECK-GLOBAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I]] -// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[P]], i64 [[IDXPROM_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP5:%.*]] = load i16, ptr addrspace(1) [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA32]] -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-GLOBAL-NEXT: store i16 [[TMP5]], ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA32]] -// CHECK-GLOBAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP106:![0-9]+]] -// CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1ssLm4ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_9naive_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_SR_NS0_4spanISS_XT2_EEESU_.exit.i: +// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META97:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META100:![0-9]+]] +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-GLOBAL: for.cond.i.i.i: +// CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I_I]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-GLOBAL-NEXT: [[CMP_I19_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 4 +// CHECK-GLOBAL-NEXT: br i1 [[CMP_I19_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL15GROUP_LOAD_IMPLINS0_9SUB_GROUPEPU3AS1SSLM4ENS3_10PROPERTIESINS4_20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS4_9NAIVE_KEYEJEEENSB_INS4_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_SR_NS0_4SPANISS_XT2_EEESU__EXIT_I_I:%.*]] +// CHECK-GLOBAL: for.body.i.i.i: +// CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP4]], [[I_0_I_I_I]] +// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I_I]] +// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[P]], i64 [[IDXPROM_I_I_I]] +// CHECK-GLOBAL-NEXT: [[TMP5:%.*]] = load i16, ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA32]] +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-GLOBAL-NEXT: store i16 [[TMP5]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 2, !tbaa [[TBAA32]] +// CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP103:![0-9]+]] +// CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental6detail15group_load_implINS0_9sub_groupEPU3AS1ssLm4ENS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS4_9naive_keyEJEEENSB_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_SR_NS0_4spanISS_XT2_EEESU_.exit.i.i: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1SSLM4ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] -// CHECK-GLOBAL: if.end.i: -// CHECK-GLOBAL-NEXT: [[CALL5_I:%.*]] = tail call spir_func noundef <4 x i16> @_Z30__spirv_SubgroupBlockReadINTELIDv4_tET_PU3AS1Kt(ptr addrspace(1) noundef nonnull [[P]]) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: store <4 x i16> [[CALL5_I]], ptr addrspace(4) [[TMP1]], align 2 +// CHECK-GLOBAL: if.end.i.i: +// CHECK-GLOBAL-NEXT: [[CALL6_I_I:%.*]] = tail call spir_func noundef <4 x i16> @_Z30__spirv_SubgroupBlockReadINTELIDv4_tET_PU3AS1Kt(ptr addrspace(1) noundef nonnull [[P]]) #[[ATTR7]] +// CHECK-GLOBAL-NEXT: store <4 x i16> [[CALL6_I_I]], ptr addrspace(4) [[TMP1]], align 2 // CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1SSLM4ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1ssLm4ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEESS_.exit: // CHECK-GLOBAL-NEXT: ret void @@ -930,38 +950,38 @@ SYCL_EXTERNAL void test_runtime_align_check(sycl::sub_group &sg, // CHECK-LOCAL-NEXT: entry: // CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA25]] // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// CHECK-LOCAL-NEXT: [[CMP_I_I:%.*]] = icmp ne ptr addrspace(3) [[P:%.*]], null -// CHECK-LOCAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I]]) +// CHECK-LOCAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(3) [[P:%.*]], null +// CHECK-LOCAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) // CHECK-LOCAL-NEXT: [[TMP2:%.*]] = ptrtoint ptr addrspace(3) [[P]] to i64 -// CHECK-LOCAL-NEXT: [[REM_I_I:%.*]] = and i64 [[TMP2]], 15 -// CHECK-LOCAL-NEXT: [[CMP_I20_I:%.*]] = icmp eq i64 [[REM_I_I]], 0 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I20_I]], label [[IF_END_I:%.*]], label [[IF_THEN_I:%.*]] -// CHECK-LOCAL: if.then.i: +// CHECK-LOCAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP2]], 15 +// CHECK-LOCAL-NEXT: [[CMP_I15_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I15_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] +// CHECK-LOCAL: if.then.i.i: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META111:![0-9]+]] // CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META114:![0-9]+]] -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I:%.*]] -// CHECK-LOCAL: for.cond.i.i: -// CHECK-LOCAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-LOCAL-NEXT: [[CMP_I24_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 4 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I24_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3SSLM4ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_9NAIVE_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_SR_NS0_4SPANISS_XT2_EEESU__EXIT_I:%.*]] -// CHECK-LOCAL: for.body.i.i: -// CHECK-LOCAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[MUL_I_I_I:%.*]] = mul i32 [[TMP4]], [[I_0_I_I]] -// CHECK-LOCAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I]] -// CHECK-LOCAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(3) [[P]], i64 [[IDXPROM_I_I]] -// CHECK-LOCAL-NEXT: [[TMP5:%.*]] = load i16, ptr addrspace(3) [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA30]] -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-LOCAL-NEXT: store i16 [[TMP5]], ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA30]] -// CHECK-LOCAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP117:![0-9]+]] -// CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS3ssLm4ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_9naive_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_SR_NS0_4spanISS_XT2_EEESU_.exit.i: +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-LOCAL: for.cond.i.i.i: +// CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I_I]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-LOCAL-NEXT: [[CMP_I19_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 4 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I19_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL15GROUP_LOAD_IMPLINS0_9SUB_GROUPEPU3AS3SSLM4ENS3_10PROPERTIESINS4_20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS4_9NAIVE_KEYEJEEENSB_INS4_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_SR_NS0_4SPANISS_XT2_EEESU__EXIT_I_I:%.*]] +// CHECK-LOCAL: for.body.i.i.i: +// CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP4]], [[I_0_I_I_I]] +// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I_I]] +// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(3) [[P]], i64 [[IDXPROM_I_I_I]] +// CHECK-LOCAL-NEXT: [[TMP5:%.*]] = load i16, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-LOCAL-NEXT: store i16 [[TMP5]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP117:![0-9]+]] +// CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental6detail15group_load_implINS0_9sub_groupEPU3AS3ssLm4ENS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS4_9naive_keyEJEEENSB_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_SR_NS0_4spanISS_XT2_EEESU_.exit.i.i: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3SSLM4ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] -// CHECK-LOCAL: if.end.i: -// CHECK-LOCAL-NEXT: [[CALL5_I:%.*]] = tail call spir_func noundef <4 x i16> @_Z30__spirv_SubgroupBlockReadINTELIDv4_tET_PU3AS3Kt(ptr addrspace(3) noundef nonnull [[P]]) #[[ATTR6]] -// CHECK-LOCAL-NEXT: store <4 x i16> [[CALL5_I]], ptr addrspace(4) [[TMP1]], align 2 +// CHECK-LOCAL: if.end.i.i: +// CHECK-LOCAL-NEXT: [[CALL6_I_I:%.*]] = tail call spir_func noundef <4 x i16> @_Z30__spirv_SubgroupBlockReadINTELIDv4_tET_PU3AS3Kt(ptr addrspace(3) noundef nonnull [[P]]) #[[ATTR6]] +// CHECK-LOCAL-NEXT: store <4 x i16> [[CALL6_I_I]], ptr addrspace(4) [[TMP1]], align 2 // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3SSLM4ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS3ssLm4ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEESS_.exit: // CHECK-LOCAL-NEXT: ret void @@ -977,38 +997,38 @@ SYCL_EXTERNAL void test_four_shorts(sycl::sub_group &sg, plain_ptr p, // CHECK-GLOBAL-NEXT: entry: // CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA27]] // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// CHECK-GLOBAL-NEXT: [[CMP_I_I:%.*]] = icmp ne ptr addrspace(1) [[P:%.*]], null -// CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I]]) +// CHECK-GLOBAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(1) [[P:%.*]], null +// CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) // CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = ptrtoint ptr addrspace(1) [[P]] to i64 -// CHECK-GLOBAL-NEXT: [[REM_I_I:%.*]] = and i64 [[TMP2]], 3 -// CHECK-GLOBAL-NEXT: [[CMP_I20_I:%.*]] = icmp eq i64 [[REM_I_I]], 0 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I20_I]], label [[IF_END_I:%.*]], label [[IF_THEN_I:%.*]] -// CHECK-GLOBAL: if.then.i: +// CHECK-GLOBAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP2]], 3 +// CHECK-GLOBAL-NEXT: [[CMP_I15_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 +// CHECK-GLOBAL-NEXT: br i1 [[CMP_I15_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] +// CHECK-GLOBAL: if.then.i.i: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META108:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META111:![0-9]+]] -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I:%.*]] -// CHECK-GLOBAL: for.cond.i.i: -// CHECK-GLOBAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-GLOBAL-NEXT: [[CMP_I24_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 16 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I24_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1SSLM16ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_9NAIVE_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_SR_NS0_4SPANISS_XT2_EEESU__EXIT_I:%.*]] -// CHECK-GLOBAL: for.body.i.i: -// CHECK-GLOBAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I:%.*]] = mul i32 [[TMP4]], [[I_0_I_I]] -// CHECK-GLOBAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I]] -// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[P]], i64 [[IDXPROM_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP5:%.*]] = load i16, ptr addrspace(1) [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA32]] -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-GLOBAL-NEXT: store i16 [[TMP5]], ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA32]] -// CHECK-GLOBAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP114:![0-9]+]] -// CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1ssLm16ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_9naive_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_SR_NS0_4spanISS_XT2_EEESU_.exit.i: +// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META105:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META108:![0-9]+]] +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-GLOBAL: for.cond.i.i.i: +// CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I_I]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-GLOBAL-NEXT: [[CMP_I19_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 16 +// CHECK-GLOBAL-NEXT: br i1 [[CMP_I19_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL15GROUP_LOAD_IMPLINS0_9SUB_GROUPEPU3AS1SSLM16ENS3_10PROPERTIESINS4_20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS4_9NAIVE_KEYEJEEENSB_INS4_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_SR_NS0_4SPANISS_XT2_EEESU__EXIT_I_I:%.*]] +// CHECK-GLOBAL: for.body.i.i.i: +// CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP4]], [[I_0_I_I_I]] +// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I_I]] +// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[P]], i64 [[IDXPROM_I_I_I]] +// CHECK-GLOBAL-NEXT: [[TMP5:%.*]] = load i16, ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA32]] +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-GLOBAL-NEXT: store i16 [[TMP5]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 2, !tbaa [[TBAA32]] +// CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP111:![0-9]+]] +// CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental6detail15group_load_implINS0_9sub_groupEPU3AS1ssLm16ENS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS4_9naive_keyEJEEENSB_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_SR_NS0_4spanISS_XT2_EEESU_.exit.i.i: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1SSLM16ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] -// CHECK-GLOBAL: if.end.i: -// CHECK-GLOBAL-NEXT: [[CALL5_I:%.*]] = tail call spir_func noundef <16 x i16> @_Z30__spirv_SubgroupBlockReadINTELIDv16_tET_PU3AS1Kt(ptr addrspace(1) noundef nonnull [[P]]) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: store <16 x i16> [[CALL5_I]], ptr addrspace(4) [[TMP1]], align 2 +// CHECK-GLOBAL: if.end.i.i: +// CHECK-GLOBAL-NEXT: [[CALL6_I_I:%.*]] = tail call spir_func noundef <16 x i16> @_Z30__spirv_SubgroupBlockReadINTELIDv16_tET_PU3AS1Kt(ptr addrspace(1) noundef nonnull [[P]]) #[[ATTR7]] +// CHECK-GLOBAL-NEXT: store <16 x i16> [[CALL6_I_I]], ptr addrspace(4) [[TMP1]], align 2 // CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1SSLM16ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1ssLm16ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEESS_.exit: // CHECK-GLOBAL-NEXT: ret void @@ -1017,38 +1037,38 @@ SYCL_EXTERNAL void test_four_shorts(sycl::sub_group &sg, plain_ptr p, // CHECK-LOCAL-NEXT: entry: // CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA25]] // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// CHECK-LOCAL-NEXT: [[CMP_I_I:%.*]] = icmp ne ptr addrspace(3) [[P:%.*]], null -// CHECK-LOCAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I]]) +// CHECK-LOCAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(3) [[P:%.*]], null +// CHECK-LOCAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) // CHECK-LOCAL-NEXT: [[TMP2:%.*]] = ptrtoint ptr addrspace(3) [[P]] to i64 -// CHECK-LOCAL-NEXT: [[REM_I_I:%.*]] = and i64 [[TMP2]], 15 -// CHECK-LOCAL-NEXT: [[CMP_I20_I:%.*]] = icmp eq i64 [[REM_I_I]], 0 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I20_I]], label [[IF_END_I:%.*]], label [[IF_THEN_I:%.*]] -// CHECK-LOCAL: if.then.i: +// CHECK-LOCAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP2]], 15 +// CHECK-LOCAL-NEXT: [[CMP_I15_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I15_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] +// CHECK-LOCAL: if.then.i.i: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META119:![0-9]+]] // CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META122:![0-9]+]] -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I:%.*]] -// CHECK-LOCAL: for.cond.i.i: -// CHECK-LOCAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-LOCAL-NEXT: [[CMP_I24_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 16 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I24_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3SSLM16ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_9NAIVE_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_SR_NS0_4SPANISS_XT2_EEESU__EXIT_I:%.*]] -// CHECK-LOCAL: for.body.i.i: -// CHECK-LOCAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[MUL_I_I_I:%.*]] = mul i32 [[TMP4]], [[I_0_I_I]] -// CHECK-LOCAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I]] -// CHECK-LOCAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(3) [[P]], i64 [[IDXPROM_I_I]] -// CHECK-LOCAL-NEXT: [[TMP5:%.*]] = load i16, ptr addrspace(3) [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA30]] -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-LOCAL-NEXT: store i16 [[TMP5]], ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA30]] -// CHECK-LOCAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP125:![0-9]+]] -// CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS3ssLm16ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_9naive_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_SR_NS0_4spanISS_XT2_EEESU_.exit.i: +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-LOCAL: for.cond.i.i.i: +// CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I_I]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-LOCAL-NEXT: [[CMP_I19_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 16 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I19_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL15GROUP_LOAD_IMPLINS0_9SUB_GROUPEPU3AS3SSLM16ENS3_10PROPERTIESINS4_20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS4_9NAIVE_KEYEJEEENSB_INS4_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_SR_NS0_4SPANISS_XT2_EEESU__EXIT_I_I:%.*]] +// CHECK-LOCAL: for.body.i.i.i: +// CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP4]], [[I_0_I_I_I]] +// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I_I]] +// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(3) [[P]], i64 [[IDXPROM_I_I_I]] +// CHECK-LOCAL-NEXT: [[TMP5:%.*]] = load i16, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-LOCAL-NEXT: store i16 [[TMP5]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP125:![0-9]+]] +// CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental6detail15group_load_implINS0_9sub_groupEPU3AS3ssLm16ENS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS4_9naive_keyEJEEENSB_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_SR_NS0_4spanISS_XT2_EEESU_.exit.i.i: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3SSLM16ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] -// CHECK-LOCAL: if.end.i: -// CHECK-LOCAL-NEXT: [[CALL5_I:%.*]] = tail call spir_func noundef <16 x i16> @_Z30__spirv_SubgroupBlockReadINTELIDv16_tET_PU3AS3Kt(ptr addrspace(3) noundef nonnull [[P]]) #[[ATTR6]] -// CHECK-LOCAL-NEXT: store <16 x i16> [[CALL5_I]], ptr addrspace(4) [[TMP1]], align 2 +// CHECK-LOCAL: if.end.i.i: +// CHECK-LOCAL-NEXT: [[CALL6_I_I:%.*]] = tail call spir_func noundef <16 x i16> @_Z30__spirv_SubgroupBlockReadINTELIDv16_tET_PU3AS3Kt(ptr addrspace(3) noundef nonnull [[P]]) #[[ATTR6]] +// CHECK-LOCAL-NEXT: store <16 x i16> [[CALL6_I_I]], ptr addrspace(4) [[TMP1]], align 2 // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3SSLM16ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS3ssLm16ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEESS_.exit: // CHECK-LOCAL-NEXT: ret void @@ -1063,24 +1083,24 @@ SYCL_EXTERNAL void test_sixteen_shorts(sycl::sub_group &sg, plain_ptr p, // CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA15]] // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META116:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META119:![0-9]+]] -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I:%.*]] -// CHECK-GLOBAL: for.cond.i.i: -// CHECK-GLOBAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-GLOBAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 3 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM3ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] -// CHECK-GLOBAL: for.body.i.i: -// CHECK-GLOBAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I_I]] -// CHECK-GLOBAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I]] -// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 [[IDXPROM_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-GLOBAL-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP122:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META113:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META116:![0-9]+]] +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-GLOBAL: for.cond.i.i.i: +// CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-GLOBAL-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 3 +// CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM3ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] +// CHECK-GLOBAL: for.body.i.i.i: +// CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I_I_I]] +// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I_I]] +// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 [[IDXPROM_I_I_I]] +// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-GLOBAL-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP119:![0-9]+]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm3ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEESS_.exit: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: ret void @@ -1092,22 +1112,22 @@ SYCL_EXTERNAL void test_sixteen_shorts(sycl::sub_group &sg, plain_ptr p, // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META127:![0-9]+]] // CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META130:![0-9]+]] -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I:%.*]] -// CHECK-LOCAL: for.cond.i.i: -// CHECK-LOCAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-LOCAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 3 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3IILM3ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] -// CHECK-LOCAL: for.body.i.i: -// CHECK-LOCAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[MUL_I_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I_I]] -// CHECK-LOCAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I]] -// CHECK-LOCAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P:%.*]], i64 [[IDXPROM_I_I]] -// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-LOCAL-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP133:![0-9]+]] +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-LOCAL: for.cond.i.i.i: +// CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-LOCAL-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 3 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3IILM3ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] +// CHECK-LOCAL: for.body.i.i.i: +// CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I_I_I]] +// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I_I]] +// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P:%.*]], i64 [[IDXPROM_I_I_I]] +// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-LOCAL-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP133:![0-9]+]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS3iiLm3ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEESS_.exit: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: ret void @@ -1123,24 +1143,24 @@ SYCL_EXTERNAL void test_non_power_of_two(sycl::sub_group &sg, plain_ptr p, // CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA15]] // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META124:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META127:![0-9]+]] -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I:%.*]] -// CHECK-GLOBAL: for.cond.i.i: -// CHECK-GLOBAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-GLOBAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 16 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM16ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] -// CHECK-GLOBAL: for.body.i.i: -// CHECK-GLOBAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I_I]] -// CHECK-GLOBAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I]] -// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 [[IDXPROM_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-GLOBAL-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP130:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META121:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META124:![0-9]+]] +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-GLOBAL: for.cond.i.i.i: +// CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-GLOBAL-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 16 +// CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM16ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] +// CHECK-GLOBAL: for.body.i.i.i: +// CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I_I_I]] +// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I_I]] +// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 [[IDXPROM_I_I_I]] +// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-GLOBAL-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP127:![0-9]+]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm16ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEESS_.exit: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: ret void @@ -1152,22 +1172,22 @@ SYCL_EXTERNAL void test_non_power_of_two(sycl::sub_group &sg, plain_ptr p, // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META135:![0-9]+]] // CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META138:![0-9]+]] -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I:%.*]] -// CHECK-LOCAL: for.cond.i.i: -// CHECK-LOCAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-LOCAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 16 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3IILM16ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] -// CHECK-LOCAL: for.body.i.i: -// CHECK-LOCAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[MUL_I_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I_I]] -// CHECK-LOCAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I]] -// CHECK-LOCAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P:%.*]], i64 [[IDXPROM_I_I]] -// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-LOCAL-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP141:![0-9]+]] +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-LOCAL: for.cond.i.i.i: +// CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-LOCAL-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 16 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3IILM16ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] +// CHECK-LOCAL: for.body.i.i.i: +// CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I_I_I]] +// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I_I]] +// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P:%.*]], i64 [[IDXPROM_I_I_I]] +// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-LOCAL-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP141:![0-9]+]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS3iiLm16ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEESS_.exit: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: ret void @@ -1184,24 +1204,24 @@ SYCL_EXTERNAL void test_sixteen_ints(sycl::sub_group &sg, plain_ptr p, // CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[OUT:%.*]], align 8, !tbaa [[TBAA15]] // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META132:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META135:![0-9]+]] -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I:%.*]] -// CHECK-GLOBAL: for.cond.i.i: -// CHECK-GLOBAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-GLOBAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 11 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM11ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] -// CHECK-GLOBAL: for.body.i.i: -// CHECK-GLOBAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I_I]] -// CHECK-GLOBAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I]] -// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 [[IDXPROM_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-GLOBAL-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP138:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META129:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META132:![0-9]+]] +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-GLOBAL: for.cond.i.i.i: +// CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-GLOBAL-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 11 +// CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1IILM11ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] +// CHECK-GLOBAL: for.body.i.i.i: +// CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I_I_I]] +// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I_I]] +// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 [[IDXPROM_I_I_I]] +// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-GLOBAL-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP135:![0-9]+]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1iiLm11ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEESS_.exit: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: ret void @@ -1213,22 +1233,22 @@ SYCL_EXTERNAL void test_sixteen_ints(sycl::sub_group &sg, plain_ptr p, // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META143:![0-9]+]] // CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META146:![0-9]+]] -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I:%.*]] -// CHECK-LOCAL: for.cond.i.i: -// CHECK-LOCAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-LOCAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 11 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3IILM11ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] -// CHECK-LOCAL: for.body.i.i: -// CHECK-LOCAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[MUL_I_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I_I]] -// CHECK-LOCAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I]] -// CHECK-LOCAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P:%.*]], i64 [[IDXPROM_I_I]] -// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-LOCAL-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP149:![0-9]+]] +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-LOCAL: for.cond.i.i.i: +// CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-LOCAL-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 11 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3IILM11ENS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_SP_NS0_4SPANISQ_XT2_EEESS__EXIT:%.*]] +// CHECK-LOCAL: for.body.i.i.i: +// CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I_I_I]] +// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I_I]] +// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P:%.*]], i64 [[IDXPROM_I_I_I]] +// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-LOCAL-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP149:![0-9]+]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS3iiLm11ENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_SP_NS0_4spanISQ_XT2_EEESS_.exit: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR6]] // CHECK-LOCAL-NEXT: ret void diff --git a/sycl/test/check_device_code/group_load_store_alignment.cpp b/sycl/test/check_device_code/group_load_store_alignment.cpp new file mode 100644 index 0000000000000..399fb58fe0d50 --- /dev/null +++ b/sycl/test/check_device_code/group_load_store_alignment.cpp @@ -0,0 +1,105 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clangxx -O3 -fsycl-device-only -fno-discard-value-names -S -emit-llvm -fno-sycl-instrument-device-code -o - %s | FileCheck --check-prefix CHECK-GLOBAL %s + +// REQUIRES: linux +// Test checks that when alignment property is provided with alignment value +// which meets the requirement then there is no dynamic alignment check. + +#include + +using namespace sycl; +using namespace sycl::ext::oneapi::experimental; + +using opt_blocked = + decltype(properties(full_group, contiguous_memory, data_placement_blocked)); + +using opt_blocked_aligned = decltype(properties( + full_group, contiguous_memory, data_placement_blocked, alignment<16>)); + +template +using plain_ptr = typename sycl::detail::DecoratedType< + T, access::address_space::global_space>::type *; + +// CHECK-GLOBAL-LABEL: @_Z32test_load_without_alignment_hintRN4sycl3_V19sub_groupEPU3AS1sRs( +// CHECK-GLOBAL-NEXT: entry: +// CHECK-GLOBAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(1) [[P:%.*]], null +// CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) +// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[P]] to i64 +// CHECK-GLOBAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP0]], 3 +// CHECK-GLOBAL-NEXT: [[CMP1_I_NOT_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 +// CHECK-GLOBAL-NEXT: br i1 [[CMP1_I_NOT_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] +// CHECK-GLOBAL: if.then.i.i: +// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR3:[0-9]+]] +// CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 +// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[TMP1]] to i64 +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[P]], i64 [[IDXPROM_I_I_I]] +// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA8:![0-9]+]] +// CHECK-GLOBAL-NEXT: store i16 [[TMP2]], ptr addrspace(4) [[OUT:%.*]], align 2, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR3]] +// CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1SSNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT2_EEVE4TYPEESP_SN_RSO_SQ__EXIT:%.*]] +// CHECK-GLOBAL: if.end.i.i: +// CHECK-GLOBAL-NEXT: [[CALL4_I_I:%.*]] = tail call spir_func noundef zeroext i16 @_Z30__spirv_SubgroupBlockReadINTELItET_PU3AS1Kt(ptr addrspace(1) noundef nonnull [[P]]) #[[ATTR3]] +// CHECK-GLOBAL-NEXT: store i16 [[CALL4_I_I]], ptr addrspace(4) [[OUT]], align 2 +// CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS1SSNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT2_EEVE4TYPEESP_SN_RSO_SQ__EXIT]] +// CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS1ssNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT2_EEvE4typeESP_SN_RSO_SQ_.exit: +// CHECK-GLOBAL-NEXT: ret void +// +SYCL_EXTERNAL void test_load_without_alignment_hint(sycl::sub_group &sg, + plain_ptr p, + short &out) { + group_load(sg, p, out, opt_blocked{}); +} + +// +// CHECK-GLOBAL-LABEL: @_Z29test_load_with_alignment_hintRN4sycl3_V19sub_groupEPU3AS1sRs( +// CHECK-GLOBAL-NEXT: entry: +// CHECK-GLOBAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(1) [[P:%.*]], null +// CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) +// CHECK-GLOBAL-NEXT: [[CALL1_I_I:%.*]] = tail call spir_func noundef zeroext i16 @_Z30__spirv_SubgroupBlockReadINTELItET_PU3AS1Kt(ptr addrspace(1) noundef nonnull [[P]]) #[[ATTR3]] +// CHECK-GLOBAL-NEXT: store i16 [[CALL1_I_I]], ptr addrspace(4) [[OUT:%.*]], align 2 +// CHECK-GLOBAL-NEXT: ret void +// +SYCL_EXTERNAL void test_load_with_alignment_hint(sycl::sub_group &sg, + plain_ptr p, + short &out) { + group_load(sg, p, out, opt_blocked_aligned{}); +} + +// CHECK-GLOBAL-LABEL: @_Z33test_store_without_alignment_hintRN4sycl3_V19sub_groupEiPU3AS1i( +// CHECK-GLOBAL-NEXT: entry: +// CHECK-GLOBAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(1) [[P:%.*]], null +// CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) +// CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[P]] to i64 +// CHECK-GLOBAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP0]], 15 +// CHECK-GLOBAL-NEXT: [[CMP1_I_NOT_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 +// CHECK-GLOBAL-NEXT: br i1 [[CMP1_I_NOT_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] +// CHECK-GLOBAL: if.then.i.i: +// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR3]] +// CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 +// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[TMP1]] to i64 +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P]], i64 [[IDXPROM_I_I_I]] +// CHECK-GLOBAL-NEXT: store i32 [[V:%.*]], ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA14:![0-9]+]] +// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR3]] +// CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEIPU3AS1INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT2_EEVE4TYPEESP_RKSN_SO_SQ__EXIT:%.*]] +// CHECK-GLOBAL: if.end.i.i: +// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIjEvPU3AS1jT_(ptr addrspace(1) noundef nonnull [[P]], i32 noundef [[V]]) #[[ATTR3]] +// CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEIPU3AS1INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT2_EEVE4TYPEESP_RKSN_SO_SQ__EXIT]] +// CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiPU3AS1iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT2_EEvE4typeESP_RKSN_SO_SQ_.exit: +// CHECK-GLOBAL-NEXT: ret void +// +SYCL_EXTERNAL void test_store_without_alignment_hint(sycl::sub_group &sg, int v, + plain_ptr p) { + group_store(sg, v, p, opt_blocked{}); +} + +// CHECK-GLOBAL-LABEL: @_Z30test_store_with_alignment_hintRN4sycl3_V19sub_groupEiPU3AS1i( +// CHECK-GLOBAL-NEXT: entry: +// CHECK-GLOBAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(1) [[P:%.*]], null +// CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) +// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIjEvPU3AS1jT_(ptr addrspace(1) noundef nonnull [[P]], i32 noundef [[V:%.*]]) #[[ATTR3]] +// CHECK-GLOBAL-NEXT: ret void +// +SYCL_EXTERNAL void test_store_with_alignment_hint(sycl::sub_group &sg, int v, + plain_ptr p) { + group_store(sg, v, p, opt_blocked_aligned{}); +} diff --git a/sycl/test/check_device_code/group_load_store_native_key.cpp b/sycl/test/check_device_code/group_load_store_native_key.cpp index 246b25187ab33..88b10bd2cfdd5 100644 --- a/sycl/test/check_device_code/group_load_store_native_key.cpp +++ b/sycl/test/check_device_code/group_load_store_native_key.cpp @@ -34,8 +34,8 @@ using plain_ptr = typename sycl::detail::DecoratedType< // CHECK-GLOBAL-NEXT: entry: // CHECK-GLOBAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(1) [[P:%.*]], null // CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) -// CHECK-GLOBAL-NEXT: [[CALL5_I_I:%.*]] = tail call spir_func noundef i32 @_Z30__spirv_SubgroupBlockReadINTELIjET_PU3AS1Kj(ptr addrspace(1) noundef nonnull [[P]]) #[[ATTR3:[0-9]+]] -// CHECK-GLOBAL-NEXT: store i32 [[CALL5_I_I]], ptr addrspace(4) [[OUT:%.*]], align 4 +// CHECK-GLOBAL-NEXT: [[CALL6_I_I:%.*]] = tail call spir_func noundef i32 @_Z30__spirv_SubgroupBlockReadINTELIjET_PU3AS1Kj(ptr addrspace(1) noundef nonnull [[P]]) #[[ATTR3:[0-9]+]] +// CHECK-GLOBAL-NEXT: store i32 [[CALL6_I_I]], ptr addrspace(4) [[OUT:%.*]], align 4 // CHECK-GLOBAL-NEXT: ret void // // CHECK-LOCAL-LABEL: @_Z9test_loadRN4sycl3_V19sub_groupEPU3AS3iRi( @@ -59,8 +59,8 @@ SYCL_EXTERNAL void test_load(sycl::sub_group &sg, plain_ptr p, int &out) { // CHECK-GLOBAL-NEXT: entry: // CHECK-GLOBAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(1) [[P:%.*]], null // CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) -// CHECK-GLOBAL-NEXT: [[CALL5_I_I:%.*]] = tail call spir_func noundef i32 @_Z30__spirv_SubgroupBlockReadINTELIjET_PU3AS1Kj(ptr addrspace(1) noundef nonnull [[P]]) #[[ATTR3]] -// CHECK-GLOBAL-NEXT: store i32 [[CALL5_I_I]], ptr addrspace(4) [[OUT:%.*]], align 4 +// CHECK-GLOBAL-NEXT: [[CALL6_I_I:%.*]] = tail call spir_func noundef i32 @_Z30__spirv_SubgroupBlockReadINTELIjET_PU3AS1Kj(ptr addrspace(1) noundef nonnull [[P]]) #[[ATTR3]] +// CHECK-GLOBAL-NEXT: store i32 [[CALL6_I_I]], ptr addrspace(4) [[OUT:%.*]], align 4 // CHECK-GLOBAL-NEXT: ret void // // CHECK-LOCAL-LABEL: @_Z16test_load_nativeRN4sycl3_V19sub_groupEPU3AS3iRi( @@ -69,8 +69,8 @@ SYCL_EXTERNAL void test_load(sycl::sub_group &sg, plain_ptr p, int &out) { // CHECK-LOCAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) // CHECK-LOCAL-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(3) [[P]] to i64 // CHECK-LOCAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP0]], 15 -// CHECK-LOCAL-NEXT: [[CMP_I20_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I20_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] +// CHECK-LOCAL-NEXT: [[CMP_I15_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I15_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] // CHECK-LOCAL: if.then.i.i: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR3]] // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 @@ -81,8 +81,8 @@ SYCL_EXTERNAL void test_load(sycl::sub_group &sg, plain_ptr p, int &out) { // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR3]] // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3IINS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT2_EEVE4TYPEESR_SP_RSQ_SS__EXIT:%.*]] // CHECK-LOCAL: if.end.i.i: -// CHECK-LOCAL-NEXT: [[CALL5_I_I:%.*]] = tail call spir_func noundef i32 @_Z30__spirv_SubgroupBlockReadINTELIjET_PU3AS3Kj(ptr addrspace(3) noundef nonnull [[P]]) #[[ATTR3]] -// CHECK-LOCAL-NEXT: store i32 [[CALL5_I_I]], ptr addrspace(4) [[OUT]], align 4 +// CHECK-LOCAL-NEXT: [[CALL6_I_I:%.*]] = tail call spir_func noundef i32 @_Z30__spirv_SubgroupBlockReadINTELIjET_PU3AS3Kj(ptr addrspace(3) noundef nonnull [[P]]) #[[ATTR3]] +// CHECK-LOCAL-NEXT: store i32 [[CALL6_I_I]], ptr addrspace(4) [[OUT]], align 4 // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL10GROUP_LOADINS0_9SUB_GROUPEPU3AS3IINS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE17VERIFY_LOAD_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT2_EEVE4TYPEESR_SP_RSQ_SS__EXIT]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental10group_loadINS0_9sub_groupEPU3AS3iiNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE17verify_load_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT2_EEvE4typeESR_SP_RSQ_SS_.exit: // CHECK-LOCAL-NEXT: ret void @@ -98,8 +98,8 @@ SYCL_EXTERNAL void test_load_native(sycl::sub_group &sg, plain_ptr p, // CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) // CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[P]] to i64 // CHECK-GLOBAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP0]], 15 -// CHECK-GLOBAL-NEXT: [[CMP_I23_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I23_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] +// CHECK-GLOBAL-NEXT: [[CMP_I25_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 +// CHECK-GLOBAL-NEXT: br i1 [[CMP_I25_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] // CHECK-GLOBAL: if.then.i.i: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR3]] // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 @@ -136,8 +136,8 @@ SYCL_EXTERNAL void test_store(sycl::sub_group &sg, int v, plain_ptr p) { // CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) // CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[P]] to i64 // CHECK-GLOBAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP0]], 15 -// CHECK-GLOBAL-NEXT: [[CMP_I23_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I23_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] +// CHECK-GLOBAL-NEXT: [[CMP_I25_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 +// CHECK-GLOBAL-NEXT: br i1 [[CMP_I25_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] // CHECK-GLOBAL: if.then.i.i: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR3]] // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 @@ -158,8 +158,8 @@ SYCL_EXTERNAL void test_store(sycl::sub_group &sg, int v, plain_ptr p) { // CHECK-LOCAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) // CHECK-LOCAL-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(3) [[P]] to i64 // CHECK-LOCAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP0]], 15 -// CHECK-LOCAL-NEXT: [[CMP_I23_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I23_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] +// CHECK-LOCAL-NEXT: [[CMP_I25_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I25_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] // CHECK-LOCAL: if.then.i.i: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR3]] // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4 diff --git a/sycl/test/check_device_code/group_store.cpp b/sycl/test/check_device_code/group_store.cpp index 7e61b8de4e517..9495c2dd7ab68 100644 --- a/sycl/test/check_device_code/group_store.cpp +++ b/sycl/test/check_device_code/group_store.cpp @@ -228,7 +228,7 @@ SYCL_EXTERNAL void test_accessor_iter(sycl::sub_group &sg, int v, // CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[AGG_TMP3_I]]) // CHECK-GLOBAL-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr noundef nonnull align 8 dereferenceable(80) [[AGG_TMP14]], ptr addrspace(4) noundef align 8 dereferenceable(80) [[ITER:%.*]], i64 80, i1 false) // CHECK-GLOBAL-NEXT: store ptr addrspace(4) [[V_ADDR_ASCAST]], ptr [[AGG_TMP1_I]], align 8, !tbaa [[TBAA21:![0-9]+]] -// CHECK-GLOBAL-NEXT: call spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEKiLm1ENS0_6detail17accessor_iteratorIiLi1EEENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSD_INS3_21contiguous_memory_keyEJEEENSD_INS3_14full_group_keyEJEEENSD_INSB_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_NS0_4spanISR_XT1_EEESS_SU_(ptr noundef nonnull byval(%"struct.sycl::_V1::sub_group") align 1 [[AGG_TMP_I]], ptr noundef nonnull byval(%"class.sycl::_V1::span") align 8 [[AGG_TMP1_I]], ptr noundef nonnull byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[AGG_TMP14]], ptr noundef nonnull byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.7") align 1 [[AGG_TMP3_I]]) #[[ATTR7]] +// CHECK-GLOBAL-NEXT: call spir_func void @_ZN4sycl3_V13ext6oneapi12experimental6detail16group_store_implINS0_9sub_groupEKiLm1ENS0_6detail17accessor_iteratorIiLi1EEENS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSD_INS3_21contiguous_memory_keyEJEEENSD_INS3_14full_group_keyEJEEENSD_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_NS0_4spanISR_XT1_EEESS_SU_(ptr noundef nonnull byval(%"struct.sycl::_V1::sub_group") align 1 [[AGG_TMP_I]], ptr noundef nonnull byval(%"class.sycl::_V1::span") align 8 [[AGG_TMP1_I]], ptr noundef nonnull byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[AGG_TMP14]], ptr noundef nonnull byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.7") align 1 [[AGG_TMP3_I]]) #[[ATTR7]] // CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 80, ptr nonnull [[AGG_TMP14]]) // CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) // CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[AGG_TMP1_I]]) @@ -250,7 +250,7 @@ SYCL_EXTERNAL void test_accessor_iter(sycl::sub_group &sg, int v, // CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[AGG_TMP3_I]]) // CHECK-LOCAL-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr noundef nonnull align 8 dereferenceable(80) [[AGG_TMP14]], ptr addrspace(4) noundef align 8 dereferenceable(80) [[ITER:%.*]], i64 80, i1 false) // CHECK-LOCAL-NEXT: store ptr addrspace(4) [[V_ADDR_ASCAST]], ptr [[AGG_TMP1_I]], align 8, !tbaa [[TBAA21:![0-9]+]] -// CHECK-LOCAL-NEXT: call spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEKiLm1ENS0_6detail17accessor_iteratorIiLi1EEENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSD_INS3_21contiguous_memory_keyEJEEENSD_INS3_14full_group_keyEJEEENSD_INSB_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_NS0_4spanISR_XT1_EEESS_SU_(ptr noundef nonnull byval(%"struct.sycl::_V1::sub_group") align 1 [[AGG_TMP_I]], ptr noundef nonnull byval(%"class.sycl::_V1::span") align 8 [[AGG_TMP1_I]], ptr noundef nonnull byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[AGG_TMP14]], ptr noundef nonnull byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.7") align 1 [[AGG_TMP3_I]]) #[[ATTR7]] +// CHECK-LOCAL-NEXT: call spir_func void @_ZN4sycl3_V13ext6oneapi12experimental6detail16group_store_implINS0_9sub_groupEKiLm1ENS0_6detail17accessor_iteratorIiLi1EEENS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSD_INS3_21contiguous_memory_keyEJEEENSD_INS3_14full_group_keyEJEEENSD_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_NS0_4spanISR_XT1_EEESS_SU_(ptr noundef nonnull byval(%"struct.sycl::_V1::sub_group") align 1 [[AGG_TMP_I]], ptr noundef nonnull byval(%"class.sycl::_V1::span") align 8 [[AGG_TMP1_I]], ptr noundef nonnull byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[AGG_TMP14]], ptr noundef nonnull byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.7") align 1 [[AGG_TMP3_I]]) #[[ATTR7]] // CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 80, ptr nonnull [[AGG_TMP14]]) // CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) // CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[AGG_TMP1_I]]) @@ -266,113 +266,113 @@ SYCL_EXTERNAL void test_accessor_iter_force_optimized(sycl::sub_group &sg, // CHECK-GLOBAL-LABEL: @_ZN7blocked16test_four_shortsERN4sycl3_V19sub_groupENS1_4spanIsLm4EEEPU3AS1s( // CHECK-GLOBAL-NEXT: entry: -// CHECK-GLOBAL-NEXT: [[VALUES_I:%.*]] = alloca [4 x i16], align 2 +// CHECK-GLOBAL-NEXT: [[VALUES_I_I:%.*]] = alloca [4 x i16], align 2 // CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA25:![0-9]+]] // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// CHECK-GLOBAL-NEXT: [[CMP_I_I:%.*]] = icmp ne ptr addrspace(1) [[P:%.*]], null -// CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I]]) +// CHECK-GLOBAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(1) [[P:%.*]], null +// CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) // CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = ptrtoint ptr addrspace(1) [[P]] to i64 -// CHECK-GLOBAL-NEXT: [[REM_I_I:%.*]] = and i64 [[TMP2]], 15 -// CHECK-GLOBAL-NEXT: [[CMP1_I_NOT_I:%.*]] = icmp eq i64 [[REM_I_I]], 0 -// CHECK-GLOBAL-NEXT: br i1 [[CMP1_I_NOT_I]], label [[IF_END_I:%.*]], label [[IF_THEN_I:%.*]] -// CHECK-GLOBAL: if.then.i: +// CHECK-GLOBAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP2]], 15 +// CHECK-GLOBAL-NEXT: [[CMP1_I_NOT_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 +// CHECK-GLOBAL-NEXT: br i1 [[CMP1_I_NOT_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] +// CHECK-GLOBAL: if.then.i.i: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META27:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I:%.*]] = shl i32 [[TMP3]], 2 +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = shl i32 [[TMP3]], 2 +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-GLOBAL: for.cond.i.i.i: +// CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I_I]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-GLOBAL-NEXT: [[CMP_I20_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 4 +// CHECK-GLOBAL-NEXT: br i1 [[CMP_I20_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL16GROUP_STORE_IMPLINS0_9SUB_GROUPESLM4EPU3AS1SNS3_10PROPERTIESINS4_20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS4_9NAIVE_KEYEJEEENSB_INS4_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_NS0_4SPANISR_XT1_EEESS_SU__EXIT_I_I:%.*]] +// CHECK-GLOBAL: for.body.i.i.i: +// CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 2, !tbaa [[TBAA30:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = or disjoint i32 [[MUL_I_I_I_I]], [[I_0_I_I_I]] +// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[P]], i64 [[IDXPROM_I_I_I]] +// CHECK-GLOBAL-NEXT: store i16 [[TMP4]], ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental6detail16group_store_implINS0_9sub_groupEsLm4EPU3AS1sNS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS4_9naive_keyEJEEENSB_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_NS0_4spanISR_XT1_EEESS_SU_.exit.i.i: +// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] +// CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM4EPU3AS1SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] +// CHECK-GLOBAL: if.end.i.i: +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VALUES_I_I]]) #[[ATTR9:[0-9]+]] // CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I:%.*]] // CHECK-GLOBAL: for.cond.i.i: -// CHECK-GLOBAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-GLOBAL-NEXT: [[CMP_I19_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 4 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I19_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM4EPU3AS1SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_9NAIVE_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_NS0_4SPANISR_XT1_EEESS_SU__EXIT_I:%.*]] +// CHECK-GLOBAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[IF_END_I_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] +// CHECK-GLOBAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 4 +// CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[FOR_COND_CLEANUP_I_I:%.*]] +// CHECK-GLOBAL: for.cond.cleanup.i.i: +// CHECK-GLOBAL-NEXT: [[TMP5:%.*]] = load i64, ptr [[VALUES_I_I]], align 2, !tbaa [[TBAA34:![0-9]+]] +// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELImEvPU3AS1mT_(ptr addrspace(1) noundef nonnull [[P]], i64 noundef [[TMP5]]) #[[ATTR7]] +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] +// CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM4EPU3AS1SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT]] // CHECK-GLOBAL: for.body.i.i: // CHECK-GLOBAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA30:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[ADD_I_I_I:%.*]] = or disjoint i32 [[MUL_I_I_I]], [[I_0_I_I]] -// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[P]], i64 [[IDXPROM_I_I]] -// CHECK-GLOBAL-NEXT: store i16 [[TMP4]], ptr addrspace(1) [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I21_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] +// CHECK-GLOBAL-NEXT: [[TMP6:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I21_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [4 x i16], ptr [[VALUES_I_I]], i64 0, i64 [[CONV_I_I]] +// CHECK-GLOBAL-NEXT: store i16 [[TMP6]], ptr [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA30]] // CHECK-GLOBAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP32:![0-9]+]] -// CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEsLm4EPU3AS1sNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_9naive_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_NS0_4spanISR_XT1_EEESS_SU_.exit.i: -// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM4EPU3AS1SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] -// CHECK-GLOBAL: if.end.i: -// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VALUES_I]]) #[[ATTR9:[0-9]+]] -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I:%.*]] -// CHECK-GLOBAL: for.cond.i: -// CHECK-GLOBAL-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[IF_END_I]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] -// CHECK-GLOBAL-NEXT: [[CMP_I:%.*]] = icmp samesign ult i32 [[I_0_I]], 4 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[FOR_COND_CLEANUP_I:%.*]] -// CHECK-GLOBAL: for.cond.cleanup.i: -// CHECK-GLOBAL-NEXT: [[TMP5:%.*]] = load i64, ptr [[VALUES_I]], align 2, !tbaa [[TBAA34:![0-9]+]] -// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELImEvPU3AS1mT_(ptr addrspace(1) noundef nonnull [[P]], i64 noundef [[TMP5]]) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES_I]]) #[[ATTR9]] -// CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM4EPU3AS1SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT]] -// CHECK-GLOBAL: for.body.i: -// CHECK-GLOBAL-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I20_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-GLOBAL-NEXT: [[TMP6:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I20_I]], align 2, !tbaa [[TBAA30]] -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x i16], ptr [[VALUES_I]], i64 0, i64 [[CONV_I]] -// CHECK-GLOBAL-NEXT: store i16 [[TMP6]], ptr [[ARRAYIDX_I]], align 2, !tbaa [[TBAA30]] -// CHECK-GLOBAL-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP35:![0-9]+]] +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEsLm4EPU3AS1sNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit: // CHECK-GLOBAL-NEXT: ret void // // CHECK-LOCAL-LABEL: @_ZN7blocked16test_four_shortsERN4sycl3_V19sub_groupENS1_4spanIsLm4EEEPU3AS3s( // CHECK-LOCAL-NEXT: entry: -// CHECK-LOCAL-NEXT: [[VALUES_I:%.*]] = alloca [4 x i16], align 2 +// CHECK-LOCAL-NEXT: [[VALUES_I_I:%.*]] = alloca [4 x i16], align 2 // CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA25:![0-9]+]] // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// CHECK-LOCAL-NEXT: [[CMP_I_I:%.*]] = icmp ne ptr addrspace(3) [[P:%.*]], null -// CHECK-LOCAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I]]) +// CHECK-LOCAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(3) [[P:%.*]], null +// CHECK-LOCAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) // CHECK-LOCAL-NEXT: [[TMP2:%.*]] = ptrtoint ptr addrspace(3) [[P]] to i64 -// CHECK-LOCAL-NEXT: [[REM_I_I:%.*]] = and i64 [[TMP2]], 15 -// CHECK-LOCAL-NEXT: [[CMP1_I_NOT_I:%.*]] = icmp eq i64 [[REM_I_I]], 0 -// CHECK-LOCAL-NEXT: br i1 [[CMP1_I_NOT_I]], label [[IF_END_I:%.*]], label [[IF_THEN_I:%.*]] -// CHECK-LOCAL: if.then.i: +// CHECK-LOCAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP2]], 15 +// CHECK-LOCAL-NEXT: [[CMP1_I_NOT_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 +// CHECK-LOCAL-NEXT: br i1 [[CMP1_I_NOT_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] +// CHECK-LOCAL: if.then.i.i: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META27:![0-9]+]] -// CHECK-LOCAL-NEXT: [[MUL_I_I_I:%.*]] = shl i32 [[TMP3]], 2 +// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = shl i32 [[TMP3]], 2 +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-LOCAL: for.cond.i.i.i: +// CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I_I]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-LOCAL-NEXT: [[CMP_I20_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 4 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I20_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL16GROUP_STORE_IMPLINS0_9SUB_GROUPESLM4EPU3AS3SNS3_10PROPERTIESINS4_20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS4_9NAIVE_KEYEJEEENSB_INS4_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_NS0_4SPANISR_XT1_EEESS_SU__EXIT_I_I:%.*]] +// CHECK-LOCAL: for.body.i.i.i: +// CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 2, !tbaa [[TBAA30:![0-9]+]] +// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = or disjoint i32 [[MUL_I_I_I_I]], [[I_0_I_I_I]] +// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(3) [[P]], i64 [[IDXPROM_I_I_I]] +// CHECK-LOCAL-NEXT: store i16 [[TMP4]], ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental6detail16group_store_implINS0_9sub_groupEsLm4EPU3AS3sNS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS4_9naive_keyEJEEENSB_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_NS0_4spanISR_XT1_EEESS_SU_.exit.i.i: +// CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] +// CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM4EPU3AS3SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] +// CHECK-LOCAL: if.end.i.i: +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VALUES_I_I]]) #[[ATTR9:[0-9]+]] // CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I:%.*]] // CHECK-LOCAL: for.cond.i.i: -// CHECK-LOCAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-LOCAL-NEXT: [[CMP_I19_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 4 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I19_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM4EPU3AS3SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_9NAIVE_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_NS0_4SPANISR_XT1_EEESS_SU__EXIT_I:%.*]] +// CHECK-LOCAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[IF_END_I_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] +// CHECK-LOCAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 4 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[FOR_COND_CLEANUP_I_I:%.*]] +// CHECK-LOCAL: for.cond.cleanup.i.i: +// CHECK-LOCAL-NEXT: [[TMP5:%.*]] = load i64, ptr [[VALUES_I_I]], align 2, !tbaa [[TBAA34:![0-9]+]] +// CHECK-LOCAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELImEvPU3AS3mT_(ptr addrspace(3) noundef nonnull [[P]], i64 noundef [[TMP5]]) #[[ATTR7]] +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] +// CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM4EPU3AS3SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT]] // CHECK-LOCAL: for.body.i.i: // CHECK-LOCAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA30:![0-9]+]] -// CHECK-LOCAL-NEXT: [[ADD_I_I_I:%.*]] = or disjoint i32 [[MUL_I_I_I]], [[I_0_I_I]] -// CHECK-LOCAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(3) [[P]], i64 [[IDXPROM_I_I]] -// CHECK-LOCAL-NEXT: store i16 [[TMP4]], ptr addrspace(3) [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I21_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] +// CHECK-LOCAL-NEXT: [[TMP6:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I21_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [4 x i16], ptr [[VALUES_I_I]], i64 0, i64 [[CONV_I_I]] +// CHECK-LOCAL-NEXT: store i16 [[TMP6]], ptr [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA30]] // CHECK-LOCAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP32:![0-9]+]] -// CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEsLm4EPU3AS3sNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_9naive_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_NS0_4spanISR_XT1_EEESS_SU_.exit.i: -// CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM4EPU3AS3SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] -// CHECK-LOCAL: if.end.i: -// CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VALUES_I]]) #[[ATTR9:[0-9]+]] -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I:%.*]] -// CHECK-LOCAL: for.cond.i: -// CHECK-LOCAL-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[IF_END_I]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] -// CHECK-LOCAL-NEXT: [[CMP_I:%.*]] = icmp samesign ult i32 [[I_0_I]], 4 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[FOR_COND_CLEANUP_I:%.*]] -// CHECK-LOCAL: for.cond.cleanup.i: -// CHECK-LOCAL-NEXT: [[TMP5:%.*]] = load i64, ptr [[VALUES_I]], align 2, !tbaa [[TBAA34:![0-9]+]] -// CHECK-LOCAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELImEvPU3AS3mT_(ptr addrspace(3) noundef nonnull [[P]], i64 noundef [[TMP5]]) #[[ATTR7]] -// CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES_I]]) #[[ATTR9]] -// CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM4EPU3AS3SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT]] -// CHECK-LOCAL: for.body.i: -// CHECK-LOCAL-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I20_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-LOCAL-NEXT: [[TMP6:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I20_I]], align 2, !tbaa [[TBAA30]] -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x i16], ptr [[VALUES_I]], i64 0, i64 [[CONV_I]] -// CHECK-LOCAL-NEXT: store i16 [[TMP6]], ptr [[ARRAYIDX_I]], align 2, !tbaa [[TBAA30]] -// CHECK-LOCAL-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP35:![0-9]+]] +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEsLm4EPU3AS3sNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit: // CHECK-LOCAL-NEXT: ret void // @@ -385,113 +385,113 @@ SYCL_EXTERNAL void test_four_shorts(sycl::sub_group &sg, span v, // CHECK-GLOBAL-LABEL: @_ZN7blocked22test_four_const_shortsERN4sycl3_V19sub_groupENS1_4spanIKsLm4EEEPU3AS1s( // CHECK-GLOBAL-NEXT: entry: -// CHECK-GLOBAL-NEXT: [[VALUES_I:%.*]] = alloca [4 x i16], align 2 +// CHECK-GLOBAL-NEXT: [[VALUES_I_I:%.*]] = alloca [4 x i16], align 2 // CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA25]] // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// CHECK-GLOBAL-NEXT: [[CMP_I_I:%.*]] = icmp ne ptr addrspace(1) [[P:%.*]], null -// CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I]]) +// CHECK-GLOBAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(1) [[P:%.*]], null +// CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) // CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = ptrtoint ptr addrspace(1) [[P]] to i64 -// CHECK-GLOBAL-NEXT: [[REM_I_I:%.*]] = and i64 [[TMP2]], 15 -// CHECK-GLOBAL-NEXT: [[CMP1_I_NOT_I:%.*]] = icmp eq i64 [[REM_I_I]], 0 -// CHECK-GLOBAL-NEXT: br i1 [[CMP1_I_NOT_I]], label [[IF_END_I:%.*]], label [[IF_THEN_I:%.*]] -// CHECK-GLOBAL: if.then.i: +// CHECK-GLOBAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP2]], 15 +// CHECK-GLOBAL-NEXT: [[CMP1_I_NOT_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 +// CHECK-GLOBAL-NEXT: br i1 [[CMP1_I_NOT_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] +// CHECK-GLOBAL: if.then.i.i: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META37:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I:%.*]] = shl i32 [[TMP3]], 2 +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = shl i32 [[TMP3]], 2 +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-GLOBAL: for.cond.i.i.i: +// CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I_I]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-GLOBAL-NEXT: [[CMP_I20_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 4 +// CHECK-GLOBAL-NEXT: br i1 [[CMP_I20_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL16GROUP_STORE_IMPLINS0_9SUB_GROUPEKSLM4EPU3AS1SNS3_10PROPERTIESINS4_20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSC_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSC_INS3_14FULL_GROUP_KEYEJEEENSC_INS4_9NAIVE_KEYEJEEENSC_INS4_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESU_NS0_4SPANISS_XT1_EEEST_SV__EXIT_I_I:%.*]] +// CHECK-GLOBAL: for.body.i.i.i: +// CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = or disjoint i32 [[MUL_I_I_I_I]], [[I_0_I_I_I]] +// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[P]], i64 [[IDXPROM_I_I_I]] +// CHECK-GLOBAL-NEXT: store i16 [[TMP4]], ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP40:![0-9]+]] +// CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental6detail16group_store_implINS0_9sub_groupEKsLm4EPU3AS1sNS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSC_INS3_21contiguous_memory_keyEJEEENSC_INS3_14full_group_keyEJEEENSC_INS4_9naive_keyEJEEENSC_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESU_NS0_4spanISS_XT1_EEEST_SV_.exit.i.i: +// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] +// CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEKSLM4EPU3AS1SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSC_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSC_INS3_14FULL_GROUP_KEYEJEEENSC_INSA_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESS_NS0_4SPANISQ_XT1_EEESR_ST__EXIT:%.*]] +// CHECK-GLOBAL: if.end.i.i: +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] // CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I:%.*]] // CHECK-GLOBAL: for.cond.i.i: -// CHECK-GLOBAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-GLOBAL-NEXT: [[CMP_I19_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 4 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I19_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEKSLM4EPU3AS1SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSC_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSC_INS3_14FULL_GROUP_KEYEJEEENSC_INSA_9NAIVE_KEYEJEEENSC_INSA_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESU_NS0_4SPANISS_XT1_EEEST_SV__EXIT_I:%.*]] +// CHECK-GLOBAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[IF_END_I_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] +// CHECK-GLOBAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 4 +// CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[FOR_COND_CLEANUP_I_I:%.*]] +// CHECK-GLOBAL: for.cond.cleanup.i.i: +// CHECK-GLOBAL-NEXT: [[TMP5:%.*]] = load i64, ptr [[VALUES_I_I]], align 2, !tbaa [[TBAA34]] +// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELImEvPU3AS1mT_(ptr addrspace(1) noundef nonnull [[P]], i64 noundef [[TMP5]]) #[[ATTR7]] +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] +// CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEKSLM4EPU3AS1SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSC_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSC_INS3_14FULL_GROUP_KEYEJEEENSC_INSA_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESS_NS0_4SPANISQ_XT1_EEESR_ST__EXIT]] // CHECK-GLOBAL: for.body.i.i: // CHECK-GLOBAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA30]] -// CHECK-GLOBAL-NEXT: [[ADD_I_I_I:%.*]] = or disjoint i32 [[MUL_I_I_I]], [[I_0_I_I]] -// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[P]], i64 [[IDXPROM_I_I]] -// CHECK-GLOBAL-NEXT: store i16 [[TMP4]], ptr addrspace(1) [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I21_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] +// CHECK-GLOBAL-NEXT: [[TMP6:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I21_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [4 x i16], ptr [[VALUES_I_I]], i64 0, i64 [[CONV_I_I]] +// CHECK-GLOBAL-NEXT: store i16 [[TMP6]], ptr [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA30]] // CHECK-GLOBAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP40:![0-9]+]] -// CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEKsLm4EPU3AS1sNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSC_INS3_21contiguous_memory_keyEJEEENSC_INS3_14full_group_keyEJEEENSC_INSA_9naive_keyEJEEENSC_INSA_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESU_NS0_4spanISS_XT1_EEEST_SV_.exit.i: -// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEKSLM4EPU3AS1SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSC_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSC_INS3_14FULL_GROUP_KEYEJEEENSC_INSA_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESS_NS0_4SPANISQ_XT1_EEESR_ST__EXIT:%.*]] -// CHECK-GLOBAL: if.end.i: -// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VALUES_I]]) #[[ATTR9]] -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I:%.*]] -// CHECK-GLOBAL: for.cond.i: -// CHECK-GLOBAL-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[IF_END_I]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] -// CHECK-GLOBAL-NEXT: [[CMP_I:%.*]] = icmp samesign ult i32 [[I_0_I]], 4 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[FOR_COND_CLEANUP_I:%.*]] -// CHECK-GLOBAL: for.cond.cleanup.i: -// CHECK-GLOBAL-NEXT: [[TMP5:%.*]] = load i64, ptr [[VALUES_I]], align 2, !tbaa [[TBAA34]] -// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELImEvPU3AS1mT_(ptr addrspace(1) noundef nonnull [[P]], i64 noundef [[TMP5]]) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES_I]]) #[[ATTR9]] -// CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEKSLM4EPU3AS1SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSC_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSC_INS3_14FULL_GROUP_KEYEJEEENSC_INSA_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESS_NS0_4SPANISQ_XT1_EEESR_ST__EXIT]] -// CHECK-GLOBAL: for.body.i: -// CHECK-GLOBAL-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I20_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-GLOBAL-NEXT: [[TMP6:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I20_I]], align 2, !tbaa [[TBAA30]] -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x i16], ptr [[VALUES_I]], i64 0, i64 [[CONV_I]] -// CHECK-GLOBAL-NEXT: store i16 [[TMP6]], ptr [[ARRAYIDX_I]], align 2, !tbaa [[TBAA30]] -// CHECK-GLOBAL-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP41:![0-9]+]] +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEKsLm4EPU3AS1sNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSC_INS3_21contiguous_memory_keyEJEEENSC_INS3_14full_group_keyEJEEENSC_INSA_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESS_NS0_4spanISQ_XT1_EEESR_ST_.exit: // CHECK-GLOBAL-NEXT: ret void // // CHECK-LOCAL-LABEL: @_ZN7blocked22test_four_const_shortsERN4sycl3_V19sub_groupENS1_4spanIKsLm4EEEPU3AS3s( // CHECK-LOCAL-NEXT: entry: -// CHECK-LOCAL-NEXT: [[VALUES_I:%.*]] = alloca [4 x i16], align 2 +// CHECK-LOCAL-NEXT: [[VALUES_I_I:%.*]] = alloca [4 x i16], align 2 // CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA25]] // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// CHECK-LOCAL-NEXT: [[CMP_I_I:%.*]] = icmp ne ptr addrspace(3) [[P:%.*]], null -// CHECK-LOCAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I]]) +// CHECK-LOCAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(3) [[P:%.*]], null +// CHECK-LOCAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) // CHECK-LOCAL-NEXT: [[TMP2:%.*]] = ptrtoint ptr addrspace(3) [[P]] to i64 -// CHECK-LOCAL-NEXT: [[REM_I_I:%.*]] = and i64 [[TMP2]], 15 -// CHECK-LOCAL-NEXT: [[CMP1_I_NOT_I:%.*]] = icmp eq i64 [[REM_I_I]], 0 -// CHECK-LOCAL-NEXT: br i1 [[CMP1_I_NOT_I]], label [[IF_END_I:%.*]], label [[IF_THEN_I:%.*]] -// CHECK-LOCAL: if.then.i: +// CHECK-LOCAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP2]], 15 +// CHECK-LOCAL-NEXT: [[CMP1_I_NOT_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 +// CHECK-LOCAL-NEXT: br i1 [[CMP1_I_NOT_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] +// CHECK-LOCAL: if.then.i.i: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META37:![0-9]+]] -// CHECK-LOCAL-NEXT: [[MUL_I_I_I:%.*]] = shl i32 [[TMP3]], 2 +// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = shl i32 [[TMP3]], 2 +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-LOCAL: for.cond.i.i.i: +// CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I_I]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-LOCAL-NEXT: [[CMP_I20_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 4 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I20_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL16GROUP_STORE_IMPLINS0_9SUB_GROUPEKSLM4EPU3AS3SNS3_10PROPERTIESINS4_20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSC_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSC_INS3_14FULL_GROUP_KEYEJEEENSC_INS4_9NAIVE_KEYEJEEENSC_INS4_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESU_NS0_4SPANISS_XT1_EEEST_SV__EXIT_I_I:%.*]] +// CHECK-LOCAL: for.body.i.i.i: +// CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = or disjoint i32 [[MUL_I_I_I_I]], [[I_0_I_I_I]] +// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(3) [[P]], i64 [[IDXPROM_I_I_I]] +// CHECK-LOCAL-NEXT: store i16 [[TMP4]], ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP40:![0-9]+]] +// CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental6detail16group_store_implINS0_9sub_groupEKsLm4EPU3AS3sNS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSC_INS3_21contiguous_memory_keyEJEEENSC_INS3_14full_group_keyEJEEENSC_INS4_9naive_keyEJEEENSC_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESU_NS0_4spanISS_XT1_EEEST_SV_.exit.i.i: +// CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] +// CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEKSLM4EPU3AS3SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSC_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSC_INS3_14FULL_GROUP_KEYEJEEENSC_INSA_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESS_NS0_4SPANISQ_XT1_EEESR_ST__EXIT:%.*]] +// CHECK-LOCAL: if.end.i.i: +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] // CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I:%.*]] // CHECK-LOCAL: for.cond.i.i: -// CHECK-LOCAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-LOCAL-NEXT: [[CMP_I19_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 4 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I19_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEKSLM4EPU3AS3SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSC_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSC_INS3_14FULL_GROUP_KEYEJEEENSC_INSA_9NAIVE_KEYEJEEENSC_INSA_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESU_NS0_4SPANISS_XT1_EEEST_SV__EXIT_I:%.*]] +// CHECK-LOCAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[IF_END_I_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] +// CHECK-LOCAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 4 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[FOR_COND_CLEANUP_I_I:%.*]] +// CHECK-LOCAL: for.cond.cleanup.i.i: +// CHECK-LOCAL-NEXT: [[TMP5:%.*]] = load i64, ptr [[VALUES_I_I]], align 2, !tbaa [[TBAA34]] +// CHECK-LOCAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELImEvPU3AS3mT_(ptr addrspace(3) noundef nonnull [[P]], i64 noundef [[TMP5]]) #[[ATTR7]] +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] +// CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEKSLM4EPU3AS3SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSC_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSC_INS3_14FULL_GROUP_KEYEJEEENSC_INSA_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESS_NS0_4SPANISQ_XT1_EEESR_ST__EXIT]] // CHECK-LOCAL: for.body.i.i: // CHECK-LOCAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA30]] -// CHECK-LOCAL-NEXT: [[ADD_I_I_I:%.*]] = or disjoint i32 [[MUL_I_I_I]], [[I_0_I_I]] -// CHECK-LOCAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(3) [[P]], i64 [[IDXPROM_I_I]] -// CHECK-LOCAL-NEXT: store i16 [[TMP4]], ptr addrspace(3) [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I21_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] +// CHECK-LOCAL-NEXT: [[TMP6:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I21_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [4 x i16], ptr [[VALUES_I_I]], i64 0, i64 [[CONV_I_I]] +// CHECK-LOCAL-NEXT: store i16 [[TMP6]], ptr [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA30]] // CHECK-LOCAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP40:![0-9]+]] -// CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEKsLm4EPU3AS3sNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSC_INS3_21contiguous_memory_keyEJEEENSC_INS3_14full_group_keyEJEEENSC_INSA_9naive_keyEJEEENSC_INSA_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESU_NS0_4spanISS_XT1_EEEST_SV_.exit.i: -// CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEKSLM4EPU3AS3SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSC_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSC_INS3_14FULL_GROUP_KEYEJEEENSC_INSA_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESS_NS0_4SPANISQ_XT1_EEESR_ST__EXIT:%.*]] -// CHECK-LOCAL: if.end.i: -// CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VALUES_I]]) #[[ATTR9]] -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I:%.*]] -// CHECK-LOCAL: for.cond.i: -// CHECK-LOCAL-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[IF_END_I]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] -// CHECK-LOCAL-NEXT: [[CMP_I:%.*]] = icmp samesign ult i32 [[I_0_I]], 4 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[FOR_COND_CLEANUP_I:%.*]] -// CHECK-LOCAL: for.cond.cleanup.i: -// CHECK-LOCAL-NEXT: [[TMP5:%.*]] = load i64, ptr [[VALUES_I]], align 2, !tbaa [[TBAA34]] -// CHECK-LOCAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELImEvPU3AS3mT_(ptr addrspace(3) noundef nonnull [[P]], i64 noundef [[TMP5]]) #[[ATTR7]] -// CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES_I]]) #[[ATTR9]] -// CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEKSLM4EPU3AS3SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSC_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSC_INS3_14FULL_GROUP_KEYEJEEENSC_INSA_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESS_NS0_4SPANISQ_XT1_EEESR_ST__EXIT]] -// CHECK-LOCAL: for.body.i: -// CHECK-LOCAL-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I20_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-LOCAL-NEXT: [[TMP6:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I20_I]], align 2, !tbaa [[TBAA30]] -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x i16], ptr [[VALUES_I]], i64 0, i64 [[CONV_I]] -// CHECK-LOCAL-NEXT: store i16 [[TMP6]], ptr [[ARRAYIDX_I]], align 2, !tbaa [[TBAA30]] -// CHECK-LOCAL-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP41:![0-9]+]] +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEKsLm4EPU3AS3sNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSC_INS3_21contiguous_memory_keyEJEEENSC_INS3_14full_group_keyEJEEENSC_INSA_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESS_NS0_4spanISQ_XT1_EEESR_ST_.exit: // CHECK-LOCAL-NEXT: ret void // @@ -508,22 +508,22 @@ SYCL_EXTERNAL void test_four_const_shorts(sycl::sub_group &sg, // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META43:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I:%.*]] = mul i32 [[TMP2]], 3 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I:%.*]] -// CHECK-GLOBAL: for.cond.i.i: -// CHECK-GLOBAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-GLOBAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 3 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM3EPU3AS1INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] -// CHECK-GLOBAL: for.body.i.i: -// CHECK-GLOBAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[MUL_I_I_I]], [[I_0_I_I]] -// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 [[IDXPROM_I_I]] -// CHECK-GLOBAL-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP46:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP2]], 3 +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-GLOBAL: for.cond.i.i.i: +// CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-GLOBAL-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 3 +// CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM3EPU3AS1INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] +// CHECK-GLOBAL: for.body.i.i.i: +// CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[MUL_I_I_I_I]], [[I_0_I_I_I]] +// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 [[IDXPROM_I_I_I]] +// CHECK-GLOBAL-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm3EPU3AS1iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: ret void @@ -534,22 +534,22 @@ SYCL_EXTERNAL void test_four_const_shorts(sycl::sub_group &sg, // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META43:![0-9]+]] -// CHECK-LOCAL-NEXT: [[MUL_I_I_I:%.*]] = mul i32 [[TMP2]], 3 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I:%.*]] -// CHECK-LOCAL: for.cond.i.i: -// CHECK-LOCAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-LOCAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 3 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM3EPU3AS3INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] -// CHECK-LOCAL: for.body.i.i: -// CHECK-LOCAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[MUL_I_I_I]], [[I_0_I_I]] -// CHECK-LOCAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P:%.*]], i64 [[IDXPROM_I_I]] -// CHECK-LOCAL-NEXT: store i32 [[TMP3]], ptr addrspace(3) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP46:![0-9]+]] +// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP2]], 3 +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-LOCAL: for.cond.i.i.i: +// CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-LOCAL-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 3 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM3EPU3AS3INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] +// CHECK-LOCAL: for.body.i.i.i: +// CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[MUL_I_I_I_I]], [[I_0_I_I_I]] +// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P:%.*]], i64 [[IDXPROM_I_I_I]] +// CHECK-LOCAL-NEXT: store i32 [[TMP3]], ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm3EPU3AS3iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-LOCAL-NEXT: ret void @@ -566,22 +566,22 @@ SYCL_EXTERNAL void test_non_power_of_two(sycl::sub_group &sg, span v, // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META48:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I:%.*]] = shl i32 [[TMP2]], 2 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I:%.*]] -// CHECK-GLOBAL: for.cond.i.i: -// CHECK-GLOBAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-GLOBAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 4 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM4EPU3AS1INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] -// CHECK-GLOBAL: for.body.i.i: -// CHECK-GLOBAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[ADD_I_I_I:%.*]] = or disjoint i32 [[MUL_I_I_I]], [[I_0_I_I]] -// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 [[IDXPROM_I_I]] -// CHECK-GLOBAL-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP51:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = shl i32 [[TMP2]], 2 +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-GLOBAL: for.cond.i.i.i: +// CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-GLOBAL-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 4 +// CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM4EPU3AS1INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] +// CHECK-GLOBAL: for.body.i.i.i: +// CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = or disjoint i32 [[MUL_I_I_I_I]], [[I_0_I_I_I]] +// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 [[IDXPROM_I_I_I]] +// CHECK-GLOBAL-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm4EPU3AS1iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: ret void @@ -592,22 +592,22 @@ SYCL_EXTERNAL void test_non_power_of_two(sycl::sub_group &sg, span v, // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META48:![0-9]+]] -// CHECK-LOCAL-NEXT: [[MUL_I_I_I:%.*]] = shl i32 [[TMP2]], 2 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I:%.*]] -// CHECK-LOCAL: for.cond.i.i: -// CHECK-LOCAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-LOCAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 4 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM4EPU3AS3INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] -// CHECK-LOCAL: for.body.i.i: -// CHECK-LOCAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[ADD_I_I_I:%.*]] = or disjoint i32 [[MUL_I_I_I]], [[I_0_I_I]] -// CHECK-LOCAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P:%.*]], i64 [[IDXPROM_I_I]] -// CHECK-LOCAL-NEXT: store i32 [[TMP3]], ptr addrspace(3) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP51:![0-9]+]] +// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = shl i32 [[TMP2]], 2 +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-LOCAL: for.cond.i.i.i: +// CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-LOCAL-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 4 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM4EPU3AS3INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] +// CHECK-LOCAL: for.body.i.i.i: +// CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = or disjoint i32 [[MUL_I_I_I_I]], [[I_0_I_I_I]] +// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P:%.*]], i64 [[IDXPROM_I_I_I]] +// CHECK-LOCAL-NEXT: store i32 [[TMP3]], ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm4EPU3AS3iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-LOCAL-NEXT: ret void @@ -625,22 +625,22 @@ SYCL_EXTERNAL void test_four_ints(sycl::sub_group &sg, span v, // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META53:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I:%.*]] = mul i32 [[TMP2]], 7 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I:%.*]] -// CHECK-GLOBAL: for.cond.i.i: -// CHECK-GLOBAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-GLOBAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 7 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM7EPU3AS1INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] -// CHECK-GLOBAL: for.body.i.i: -// CHECK-GLOBAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[MUL_I_I_I]], [[I_0_I_I]] -// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 [[IDXPROM_I_I]] -// CHECK-GLOBAL-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP56:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP2]], 7 +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-GLOBAL: for.cond.i.i.i: +// CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-GLOBAL-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 7 +// CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM7EPU3AS1INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] +// CHECK-GLOBAL: for.body.i.i.i: +// CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[MUL_I_I_I_I]], [[I_0_I_I_I]] +// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 [[IDXPROM_I_I_I]] +// CHECK-GLOBAL-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm7EPU3AS1iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: ret void @@ -651,22 +651,22 @@ SYCL_EXTERNAL void test_four_ints(sycl::sub_group &sg, span v, // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META53:![0-9]+]] -// CHECK-LOCAL-NEXT: [[MUL_I_I_I:%.*]] = mul i32 [[TMP2]], 7 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I:%.*]] -// CHECK-LOCAL: for.cond.i.i: -// CHECK-LOCAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-LOCAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 7 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM7EPU3AS3INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] -// CHECK-LOCAL: for.body.i.i: -// CHECK-LOCAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[MUL_I_I_I]], [[I_0_I_I]] -// CHECK-LOCAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P:%.*]], i64 [[IDXPROM_I_I]] -// CHECK-LOCAL-NEXT: store i32 [[TMP3]], ptr addrspace(3) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP56:![0-9]+]] +// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP2]], 7 +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-LOCAL: for.cond.i.i.i: +// CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-LOCAL-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 7 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM7EPU3AS3INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] +// CHECK-LOCAL: for.body.i.i.i: +// CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[MUL_I_I_I_I]], [[I_0_I_I_I]] +// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P:%.*]], i64 [[IDXPROM_I_I_I]] +// CHECK-LOCAL-NEXT: store i32 [[TMP3]], ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm7EPU3AS3iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-LOCAL-NEXT: ret void @@ -686,22 +686,22 @@ namespace striped { // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META58:![0-9]+]] // CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META61:![0-9]+]] -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I:%.*]] -// CHECK-GLOBAL: for.cond.i: -// CHECK-GLOBAL-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] -// CHECK-GLOBAL-NEXT: [[CMP_I:%.*]] = icmp samesign ult i32 [[I_0_I]], 2 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2EPU3AS1INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS9_9NAIVE_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESP_NS0_4SPANISN_XT1_EEESO_SQ__EXIT:%.*]] -// CHECK-GLOBAL: for.body.i: -// CHECK-GLOBAL-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[MUL_I_I:%.*]] = mul nuw nsw i32 [[TMP3]], [[I_0_I]] -// CHECK-GLOBAL-NEXT: [[ADD_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I]] -// CHECK-GLOBAL-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[ADD_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 [[IDXPROM_I]] -// CHECK-GLOBAL-NEXT: store i32 [[TMP4]], ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP64:![0-9]+]] +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I:%.*]] +// CHECK-GLOBAL: for.cond.i.i: +// CHECK-GLOBAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] +// CHECK-GLOBAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 2 +// CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2EPU3AS1INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS9_9NAIVE_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESP_NS0_4SPANISN_XT1_EEESO_SQ__EXIT:%.*]] +// CHECK-GLOBAL: for.body.i.i: +// CHECK-GLOBAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] +// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I:%.*]] = mul nuw nsw i32 [[TMP3]], [[I_0_I_I]] +// CHECK-GLOBAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I]] +// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 [[IDXPROM_I_I]] +// CHECK-GLOBAL-NEXT: store i32 [[TMP4]], ptr addrspace(1) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP64:![0-9]+]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2EPU3AS1iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS9_9naive_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESP_NS0_4spanISN_XT1_EEESO_SQ_.exit: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: ret void @@ -713,22 +713,22 @@ namespace striped { // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META58:![0-9]+]] // CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META61:![0-9]+]] -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I:%.*]] -// CHECK-LOCAL: for.cond.i: -// CHECK-LOCAL-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] -// CHECK-LOCAL-NEXT: [[CMP_I:%.*]] = icmp samesign ult i32 [[I_0_I]], 2 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2EPU3AS3INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS9_9NAIVE_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESP_NS0_4SPANISN_XT1_EEESO_SQ__EXIT:%.*]] -// CHECK-LOCAL: for.body.i: -// CHECK-LOCAL-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[MUL_I_I:%.*]] = mul nuw nsw i32 [[TMP3]], [[I_0_I]] -// CHECK-LOCAL-NEXT: [[ADD_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I]] -// CHECK-LOCAL-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[ADD_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P:%.*]], i64 [[IDXPROM_I]] -// CHECK-LOCAL-NEXT: store i32 [[TMP4]], ptr addrspace(3) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP64:![0-9]+]] +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I:%.*]] +// CHECK-LOCAL: for.cond.i.i: +// CHECK-LOCAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] +// CHECK-LOCAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 2 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2EPU3AS3INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS9_9NAIVE_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESP_NS0_4SPANISN_XT1_EEESO_SQ__EXIT:%.*]] +// CHECK-LOCAL: for.body.i.i: +// CHECK-LOCAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] +// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[MUL_I_I_I:%.*]] = mul nuw nsw i32 [[TMP3]], [[I_0_I_I]] +// CHECK-LOCAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I]] +// CHECK-LOCAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P:%.*]], i64 [[IDXPROM_I_I]] +// CHECK-LOCAL-NEXT: store i32 [[TMP4]], ptr addrspace(3) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP64:![0-9]+]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2EPU3AS3iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS9_9naive_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESP_NS0_4spanISN_XT1_EEESO_SQ_.exit: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-LOCAL-NEXT: ret void @@ -741,115 +741,115 @@ SYCL_EXTERNAL void test_naive(sycl::sub_group &sg, span v, // CHECK-GLOBAL-LABEL: @_ZN7striped14test_optimizedERN4sycl3_V19sub_groupENS1_4spanIiLm2EEEPU3AS1i( // CHECK-GLOBAL-NEXT: entry: -// CHECK-GLOBAL-NEXT: [[VALUES_I:%.*]] = alloca [2 x i32], align 4 +// CHECK-GLOBAL-NEXT: [[VALUES_I_I:%.*]] = alloca [2 x i32], align 4 // CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA15]] // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// CHECK-GLOBAL-NEXT: [[CMP_I_I:%.*]] = icmp ne ptr addrspace(1) [[P:%.*]], null -// CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I]]) +// CHECK-GLOBAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(1) [[P:%.*]], null +// CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) // CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = ptrtoint ptr addrspace(1) [[P]] to i64 -// CHECK-GLOBAL-NEXT: [[REM_I_I:%.*]] = and i64 [[TMP2]], 15 -// CHECK-GLOBAL-NEXT: [[CMP1_I_NOT_I:%.*]] = icmp eq i64 [[REM_I_I]], 0 -// CHECK-GLOBAL-NEXT: br i1 [[CMP1_I_NOT_I]], label [[IF_END_I:%.*]], label [[IF_THEN_I:%.*]] -// CHECK-GLOBAL: if.then.i: +// CHECK-GLOBAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP2]], 15 +// CHECK-GLOBAL-NEXT: [[CMP1_I_NOT_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 +// CHECK-GLOBAL-NEXT: br i1 [[CMP1_I_NOT_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] +// CHECK-GLOBAL: if.then.i.i: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META66:![0-9]+]] // CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META69:![0-9]+]] +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-GLOBAL: for.cond.i.i.i: +// CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I_I]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-GLOBAL-NEXT: [[CMP_I20_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 2 +// CHECK-GLOBAL-NEXT: br i1 [[CMP_I20_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL16GROUP_STORE_IMPLINS0_9SUB_GROUPEILM2EPU3AS1INS3_10PROPERTIESINS4_20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS4_9NAIVE_KEYEJEEENSB_INS4_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_NS0_4SPANISR_XT1_EEESS_SU__EXIT_I_I:%.*]] +// CHECK-GLOBAL: for.body.i.i.i: +// CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-GLOBAL-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul nuw nsw i32 [[TMP4]], [[I_0_I_I_I]] +// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I_I]] +// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P]], i64 [[IDXPROM_I_I_I]] +// CHECK-GLOBAL-NEXT: store i32 [[TMP5]], ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP72:![0-9]+]] +// CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental6detail16group_store_implINS0_9sub_groupEiLm2EPU3AS1iNS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS4_9naive_keyEJEEENSB_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_NS0_4spanISR_XT1_EEESS_SU_.exit.i.i: +// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] +// CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2EPU3AS1INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] +// CHECK-GLOBAL: if.end.i.i: +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] // CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I:%.*]] // CHECK-GLOBAL: for.cond.i.i: -// CHECK-GLOBAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-GLOBAL-NEXT: [[CMP_I19_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 2 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I19_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2EPU3AS1INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_9NAIVE_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_NS0_4SPANISR_XT1_EEESS_SU__EXIT_I:%.*]] +// CHECK-GLOBAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[IF_END_I_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] +// CHECK-GLOBAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 2 +// CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[FOR_COND_CLEANUP_I_I:%.*]] +// CHECK-GLOBAL: for.cond.cleanup.i.i: +// CHECK-GLOBAL-NEXT: [[TMP6:%.*]] = load <2 x i32>, ptr [[VALUES_I_I]], align 4, !tbaa [[TBAA34]] +// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIDv2_jEvPU3AS1jT_(ptr addrspace(1) noundef nonnull [[P]], <2 x i32> noundef [[TMP6]]) #[[ATTR7]] +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] +// CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2EPU3AS1INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT]] // CHECK-GLOBAL: for.body.i.i: // CHECK-GLOBAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I:%.*]] = mul nuw nsw i32 [[TMP4]], [[I_0_I_I]] -// CHECK-GLOBAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I]] -// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P]], i64 [[IDXPROM_I_I]] -// CHECK-GLOBAL-NEXT: store i32 [[TMP5]], ptr addrspace(1) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I21_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] +// CHECK-GLOBAL-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I21_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [2 x i32], ptr [[VALUES_I_I]], i64 0, i64 [[CONV_I_I]] +// CHECK-GLOBAL-NEXT: store i32 [[TMP7]], ptr [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] // CHECK-GLOBAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP72:![0-9]+]] -// CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2EPU3AS1iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_9naive_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_NS0_4spanISR_XT1_EEESS_SU_.exit.i: -// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2EPU3AS1INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] -// CHECK-GLOBAL: if.end.i: -// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VALUES_I]]) #[[ATTR9]] -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I:%.*]] -// CHECK-GLOBAL: for.cond.i: -// CHECK-GLOBAL-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[IF_END_I]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] -// CHECK-GLOBAL-NEXT: [[CMP_I:%.*]] = icmp samesign ult i32 [[I_0_I]], 2 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[FOR_COND_CLEANUP_I:%.*]] -// CHECK-GLOBAL: for.cond.cleanup.i: -// CHECK-GLOBAL-NEXT: [[TMP6:%.*]] = load <2 x i32>, ptr [[VALUES_I]], align 4, !tbaa [[TBAA34]] -// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIDv2_jEvPU3AS1jT_(ptr addrspace(1) noundef nonnull [[P]], <2 x i32> noundef [[TMP6]]) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES_I]]) #[[ATTR9]] -// CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2EPU3AS1INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT]] -// CHECK-GLOBAL: for.body.i: -// CHECK-GLOBAL-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I20_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-GLOBAL-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I20_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x i32], ptr [[VALUES_I]], i64 0, i64 [[CONV_I]] -// CHECK-GLOBAL-NEXT: store i32 [[TMP7]], ptr [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP73:![0-9]+]] +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP73:![0-9]+]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2EPU3AS1iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit: // CHECK-GLOBAL-NEXT: ret void // // CHECK-LOCAL-LABEL: @_ZN7striped14test_optimizedERN4sycl3_V19sub_groupENS1_4spanIiLm2EEEPU3AS3i( // CHECK-LOCAL-NEXT: entry: -// CHECK-LOCAL-NEXT: [[VALUES_I:%.*]] = alloca [2 x i32], align 4 +// CHECK-LOCAL-NEXT: [[VALUES_I_I:%.*]] = alloca [2 x i32], align 4 // CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA15]] // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// CHECK-LOCAL-NEXT: [[CMP_I_I:%.*]] = icmp ne ptr addrspace(3) [[P:%.*]], null -// CHECK-LOCAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I]]) +// CHECK-LOCAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(3) [[P:%.*]], null +// CHECK-LOCAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) // CHECK-LOCAL-NEXT: [[TMP2:%.*]] = ptrtoint ptr addrspace(3) [[P]] to i64 -// CHECK-LOCAL-NEXT: [[REM_I_I:%.*]] = and i64 [[TMP2]], 15 -// CHECK-LOCAL-NEXT: [[CMP1_I_NOT_I:%.*]] = icmp eq i64 [[REM_I_I]], 0 -// CHECK-LOCAL-NEXT: br i1 [[CMP1_I_NOT_I]], label [[IF_END_I:%.*]], label [[IF_THEN_I:%.*]] -// CHECK-LOCAL: if.then.i: +// CHECK-LOCAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP2]], 15 +// CHECK-LOCAL-NEXT: [[CMP1_I_NOT_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 +// CHECK-LOCAL-NEXT: br i1 [[CMP1_I_NOT_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] +// CHECK-LOCAL: if.then.i.i: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META66:![0-9]+]] // CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META69:![0-9]+]] +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-LOCAL: for.cond.i.i.i: +// CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I_I]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-LOCAL-NEXT: [[CMP_I20_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 2 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I20_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL16GROUP_STORE_IMPLINS0_9SUB_GROUPEILM2EPU3AS3INS3_10PROPERTIESINS4_20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS4_9NAIVE_KEYEJEEENSB_INS4_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_NS0_4SPANISR_XT1_EEESS_SU__EXIT_I_I:%.*]] +// CHECK-LOCAL: for.body.i.i.i: +// CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-LOCAL-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul nuw nsw i32 [[TMP4]], [[I_0_I_I_I]] +// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I_I]] +// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P]], i64 [[IDXPROM_I_I_I]] +// CHECK-LOCAL-NEXT: store i32 [[TMP5]], ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP72:![0-9]+]] +// CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental6detail16group_store_implINS0_9sub_groupEiLm2EPU3AS3iNS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS4_9naive_keyEJEEENSB_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_NS0_4spanISR_XT1_EEESS_SU_.exit.i.i: +// CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] +// CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2EPU3AS3INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] +// CHECK-LOCAL: if.end.i.i: +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] // CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I:%.*]] // CHECK-LOCAL: for.cond.i.i: -// CHECK-LOCAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-LOCAL-NEXT: [[CMP_I19_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 2 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I19_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2EPU3AS3INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_9NAIVE_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_NS0_4SPANISR_XT1_EEESS_SU__EXIT_I:%.*]] +// CHECK-LOCAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[IF_END_I_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] +// CHECK-LOCAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 2 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[FOR_COND_CLEANUP_I_I:%.*]] +// CHECK-LOCAL: for.cond.cleanup.i.i: +// CHECK-LOCAL-NEXT: [[TMP6:%.*]] = load <2 x i32>, ptr [[VALUES_I_I]], align 4, !tbaa [[TBAA34]] +// CHECK-LOCAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIDv2_jEvPU3AS3jT_(ptr addrspace(3) noundef nonnull [[P]], <2 x i32> noundef [[TMP6]]) #[[ATTR7]] +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] +// CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2EPU3AS3INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT]] // CHECK-LOCAL: for.body.i.i: // CHECK-LOCAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-LOCAL-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[MUL_I_I_I:%.*]] = mul nuw nsw i32 [[TMP4]], [[I_0_I_I]] -// CHECK-LOCAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I]] -// CHECK-LOCAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P]], i64 [[IDXPROM_I_I]] -// CHECK-LOCAL-NEXT: store i32 [[TMP5]], ptr addrspace(3) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I21_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] +// CHECK-LOCAL-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I21_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [2 x i32], ptr [[VALUES_I_I]], i64 0, i64 [[CONV_I_I]] +// CHECK-LOCAL-NEXT: store i32 [[TMP7]], ptr [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] // CHECK-LOCAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP72:![0-9]+]] -// CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2EPU3AS3iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_9naive_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_NS0_4spanISR_XT1_EEESS_SU_.exit.i: -// CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2EPU3AS3INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] -// CHECK-LOCAL: if.end.i: -// CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VALUES_I]]) #[[ATTR9]] -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I:%.*]] -// CHECK-LOCAL: for.cond.i: -// CHECK-LOCAL-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[IF_END_I]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] -// CHECK-LOCAL-NEXT: [[CMP_I:%.*]] = icmp samesign ult i32 [[I_0_I]], 2 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[FOR_COND_CLEANUP_I:%.*]] -// CHECK-LOCAL: for.cond.cleanup.i: -// CHECK-LOCAL-NEXT: [[TMP6:%.*]] = load <2 x i32>, ptr [[VALUES_I]], align 4, !tbaa [[TBAA34]] -// CHECK-LOCAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIDv2_jEvPU3AS3jT_(ptr addrspace(3) noundef nonnull [[P]], <2 x i32> noundef [[TMP6]]) #[[ATTR7]] -// CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES_I]]) #[[ATTR9]] -// CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2EPU3AS3INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT]] -// CHECK-LOCAL: for.body.i: -// CHECK-LOCAL-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I20_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-LOCAL-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I20_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x i32], ptr [[VALUES_I]], i64 0, i64 [[CONV_I]] -// CHECK-LOCAL-NEXT: store i32 [[TMP7]], ptr [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP73:![0-9]+]] +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP73:![0-9]+]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2EPU3AS3iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit: // CHECK-LOCAL-NEXT: ret void // @@ -861,115 +861,115 @@ SYCL_EXTERNAL void test_optimized(sycl::sub_group &sg, span v, // CHECK-GLOBAL-LABEL: @_ZN7striped27test_contiguous_auto_detectERN4sycl3_V19sub_groupENS1_4spanIiLm2EEEPU3AS1i( // CHECK-GLOBAL-NEXT: entry: -// CHECK-GLOBAL-NEXT: [[VALUES_I:%.*]] = alloca [2 x i32], align 4 +// CHECK-GLOBAL-NEXT: [[VALUES_I_I:%.*]] = alloca [2 x i32], align 4 // CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA15]] // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// CHECK-GLOBAL-NEXT: [[CMP_I_I:%.*]] = icmp ne ptr addrspace(1) [[P:%.*]], null -// CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I]]) +// CHECK-GLOBAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(1) [[P:%.*]], null +// CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) // CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = ptrtoint ptr addrspace(1) [[P]] to i64 -// CHECK-GLOBAL-NEXT: [[REM_I_I:%.*]] = and i64 [[TMP2]], 15 -// CHECK-GLOBAL-NEXT: [[CMP1_I_NOT_I:%.*]] = icmp eq i64 [[REM_I_I]], 0 -// CHECK-GLOBAL-NEXT: br i1 [[CMP1_I_NOT_I]], label [[IF_END_I:%.*]], label [[IF_THEN_I:%.*]] -// CHECK-GLOBAL: if.then.i: +// CHECK-GLOBAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP2]], 15 +// CHECK-GLOBAL-NEXT: [[CMP1_I_NOT_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 +// CHECK-GLOBAL-NEXT: br i1 [[CMP1_I_NOT_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] +// CHECK-GLOBAL: if.then.i.i: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META75:![0-9]+]] // CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META78:![0-9]+]] +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-GLOBAL: for.cond.i.i.i: +// CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I_I]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-GLOBAL-NEXT: [[CMP_I20_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 2 +// CHECK-GLOBAL-NEXT: br i1 [[CMP_I20_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL16GROUP_STORE_IMPLINS0_9SUB_GROUPEILM2EPU3AS1INS3_10PROPERTIESINS4_20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS4_9NAIVE_KEYEJEEENSB_INS4_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT_I_I:%.*]] +// CHECK-GLOBAL: for.body.i.i.i: +// CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-GLOBAL-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul nuw nsw i32 [[TMP4]], [[I_0_I_I_I]] +// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I_I]] +// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P]], i64 [[IDXPROM_I_I_I]] +// CHECK-GLOBAL-NEXT: store i32 [[TMP5]], ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP81:![0-9]+]] +// CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental6detail16group_store_implINS0_9sub_groupEiLm2EPU3AS1iNS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_14full_group_keyEJEEENSB_INS4_9naive_keyEJEEENSB_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit.i.i: +// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] +// CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2EPU3AS1INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESP_NS0_4SPANISN_XT1_EEESO_SQ__EXIT:%.*]] +// CHECK-GLOBAL: if.end.i.i: +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] // CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I:%.*]] // CHECK-GLOBAL: for.cond.i.i: -// CHECK-GLOBAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-GLOBAL-NEXT: [[CMP_I19_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 2 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I19_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2EPU3AS1INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_9NAIVE_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT_I:%.*]] +// CHECK-GLOBAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[IF_END_I_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] +// CHECK-GLOBAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 2 +// CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[FOR_COND_CLEANUP_I_I:%.*]] +// CHECK-GLOBAL: for.cond.cleanup.i.i: +// CHECK-GLOBAL-NEXT: [[TMP6:%.*]] = load <2 x i32>, ptr [[VALUES_I_I]], align 4, !tbaa [[TBAA34]] +// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIDv2_jEvPU3AS1jT_(ptr addrspace(1) noundef nonnull [[P]], <2 x i32> noundef [[TMP6]]) #[[ATTR7]] +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] +// CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2EPU3AS1INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESP_NS0_4SPANISN_XT1_EEESO_SQ__EXIT]] // CHECK-GLOBAL: for.body.i.i: // CHECK-GLOBAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I:%.*]] = mul nuw nsw i32 [[TMP4]], [[I_0_I_I]] -// CHECK-GLOBAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I]] -// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P]], i64 [[IDXPROM_I_I]] -// CHECK-GLOBAL-NEXT: store i32 [[TMP5]], ptr addrspace(1) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I21_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] +// CHECK-GLOBAL-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I21_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [2 x i32], ptr [[VALUES_I_I]], i64 0, i64 [[CONV_I_I]] +// CHECK-GLOBAL-NEXT: store i32 [[TMP7]], ptr [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] // CHECK-GLOBAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP81:![0-9]+]] -// CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2EPU3AS1iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_14full_group_keyEJEEENSB_INS9_9naive_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit.i: -// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2EPU3AS1INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESP_NS0_4SPANISN_XT1_EEESO_SQ__EXIT:%.*]] -// CHECK-GLOBAL: if.end.i: -// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VALUES_I]]) #[[ATTR9]] -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I:%.*]] -// CHECK-GLOBAL: for.cond.i: -// CHECK-GLOBAL-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[IF_END_I]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] -// CHECK-GLOBAL-NEXT: [[CMP_I:%.*]] = icmp samesign ult i32 [[I_0_I]], 2 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[FOR_COND_CLEANUP_I:%.*]] -// CHECK-GLOBAL: for.cond.cleanup.i: -// CHECK-GLOBAL-NEXT: [[TMP6:%.*]] = load <2 x i32>, ptr [[VALUES_I]], align 4, !tbaa [[TBAA34]] -// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIDv2_jEvPU3AS1jT_(ptr addrspace(1) noundef nonnull [[P]], <2 x i32> noundef [[TMP6]]) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES_I]]) #[[ATTR9]] -// CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2EPU3AS1INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESP_NS0_4SPANISN_XT1_EEESO_SQ__EXIT]] -// CHECK-GLOBAL: for.body.i: -// CHECK-GLOBAL-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I20_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-GLOBAL-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I20_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x i32], ptr [[VALUES_I]], i64 0, i64 [[CONV_I]] -// CHECK-GLOBAL-NEXT: store i32 [[TMP7]], ptr [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP82:![0-9]+]] +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP82:![0-9]+]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2EPU3AS1iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESP_NS0_4spanISN_XT1_EEESO_SQ_.exit: // CHECK-GLOBAL-NEXT: ret void // // CHECK-LOCAL-LABEL: @_ZN7striped27test_contiguous_auto_detectERN4sycl3_V19sub_groupENS1_4spanIiLm2EEEPU3AS3i( // CHECK-LOCAL-NEXT: entry: -// CHECK-LOCAL-NEXT: [[VALUES_I:%.*]] = alloca [2 x i32], align 4 +// CHECK-LOCAL-NEXT: [[VALUES_I_I:%.*]] = alloca [2 x i32], align 4 // CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA15]] // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// CHECK-LOCAL-NEXT: [[CMP_I_I:%.*]] = icmp ne ptr addrspace(3) [[P:%.*]], null -// CHECK-LOCAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I]]) +// CHECK-LOCAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(3) [[P:%.*]], null +// CHECK-LOCAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) // CHECK-LOCAL-NEXT: [[TMP2:%.*]] = ptrtoint ptr addrspace(3) [[P]] to i64 -// CHECK-LOCAL-NEXT: [[REM_I_I:%.*]] = and i64 [[TMP2]], 15 -// CHECK-LOCAL-NEXT: [[CMP1_I_NOT_I:%.*]] = icmp eq i64 [[REM_I_I]], 0 -// CHECK-LOCAL-NEXT: br i1 [[CMP1_I_NOT_I]], label [[IF_END_I:%.*]], label [[IF_THEN_I:%.*]] -// CHECK-LOCAL: if.then.i: +// CHECK-LOCAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP2]], 15 +// CHECK-LOCAL-NEXT: [[CMP1_I_NOT_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 +// CHECK-LOCAL-NEXT: br i1 [[CMP1_I_NOT_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] +// CHECK-LOCAL: if.then.i.i: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META75:![0-9]+]] // CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META78:![0-9]+]] +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-LOCAL: for.cond.i.i.i: +// CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I_I]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-LOCAL-NEXT: [[CMP_I20_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 2 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I20_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL16GROUP_STORE_IMPLINS0_9SUB_GROUPEILM2EPU3AS3INS3_10PROPERTIESINS4_20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS4_9NAIVE_KEYEJEEENSB_INS4_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT_I_I:%.*]] +// CHECK-LOCAL: for.body.i.i.i: +// CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-LOCAL-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul nuw nsw i32 [[TMP4]], [[I_0_I_I_I]] +// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I_I]] +// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P]], i64 [[IDXPROM_I_I_I]] +// CHECK-LOCAL-NEXT: store i32 [[TMP5]], ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP81:![0-9]+]] +// CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental6detail16group_store_implINS0_9sub_groupEiLm2EPU3AS3iNS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_14full_group_keyEJEEENSB_INS4_9naive_keyEJEEENSB_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit.i.i: +// CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] +// CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2EPU3AS3INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESP_NS0_4SPANISN_XT1_EEESO_SQ__EXIT:%.*]] +// CHECK-LOCAL: if.end.i.i: +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] // CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I:%.*]] // CHECK-LOCAL: for.cond.i.i: -// CHECK-LOCAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-LOCAL-NEXT: [[CMP_I19_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 2 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I19_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2EPU3AS3INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_9NAIVE_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT_I:%.*]] +// CHECK-LOCAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[IF_END_I_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] +// CHECK-LOCAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 2 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[FOR_COND_CLEANUP_I_I:%.*]] +// CHECK-LOCAL: for.cond.cleanup.i.i: +// CHECK-LOCAL-NEXT: [[TMP6:%.*]] = load <2 x i32>, ptr [[VALUES_I_I]], align 4, !tbaa [[TBAA34]] +// CHECK-LOCAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIDv2_jEvPU3AS3jT_(ptr addrspace(3) noundef nonnull [[P]], <2 x i32> noundef [[TMP6]]) #[[ATTR7]] +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] +// CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2EPU3AS3INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESP_NS0_4SPANISN_XT1_EEESO_SQ__EXIT]] // CHECK-LOCAL: for.body.i.i: // CHECK-LOCAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-LOCAL-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[MUL_I_I_I:%.*]] = mul nuw nsw i32 [[TMP4]], [[I_0_I_I]] -// CHECK-LOCAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I]] -// CHECK-LOCAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P]], i64 [[IDXPROM_I_I]] -// CHECK-LOCAL-NEXT: store i32 [[TMP5]], ptr addrspace(3) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I21_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] +// CHECK-LOCAL-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I21_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [2 x i32], ptr [[VALUES_I_I]], i64 0, i64 [[CONV_I_I]] +// CHECK-LOCAL-NEXT: store i32 [[TMP7]], ptr [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] // CHECK-LOCAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP81:![0-9]+]] -// CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2EPU3AS3iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_14full_group_keyEJEEENSB_INS9_9naive_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit.i: -// CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2EPU3AS3INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESP_NS0_4SPANISN_XT1_EEESO_SQ__EXIT:%.*]] -// CHECK-LOCAL: if.end.i: -// CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VALUES_I]]) #[[ATTR9]] -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I:%.*]] -// CHECK-LOCAL: for.cond.i: -// CHECK-LOCAL-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[IF_END_I]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] -// CHECK-LOCAL-NEXT: [[CMP_I:%.*]] = icmp samesign ult i32 [[I_0_I]], 2 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[FOR_COND_CLEANUP_I:%.*]] -// CHECK-LOCAL: for.cond.cleanup.i: -// CHECK-LOCAL-NEXT: [[TMP6:%.*]] = load <2 x i32>, ptr [[VALUES_I]], align 4, !tbaa [[TBAA34]] -// CHECK-LOCAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIDv2_jEvPU3AS3jT_(ptr addrspace(3) noundef nonnull [[P]], <2 x i32> noundef [[TMP6]]) #[[ATTR7]] -// CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES_I]]) #[[ATTR9]] -// CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2EPU3AS3INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESP_NS0_4SPANISN_XT1_EEESO_SQ__EXIT]] -// CHECK-LOCAL: for.body.i: -// CHECK-LOCAL-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I20_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-LOCAL-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I20_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x i32], ptr [[VALUES_I]], i64 0, i64 [[CONV_I]] -// CHECK-LOCAL-NEXT: store i32 [[TMP7]], ptr [[ARRAYIDX_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP82:![0-9]+]] +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP82:![0-9]+]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2EPU3AS3iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESP_NS0_4spanISN_XT1_EEESO_SQ_.exit: // CHECK-LOCAL-NEXT: ret void // @@ -995,22 +995,22 @@ using accessor_iter_t = accessor v, // CHECK-GLOBAL-LABEL: @_ZN7striped34test_accessor_iter_force_optimizedERN4sycl3_V19sub_groupENS1_4spanIiLm2EEERNS1_6detail17accessor_iteratorIiLi1EEE( // CHECK-GLOBAL-NEXT: entry: -// CHECK-GLOBAL-NEXT: [[AGG_TMP:%.*]] = alloca %"struct.sycl::_V1::sub_group", align 1 -// CHECK-GLOBAL-NEXT: [[AGG_TMP1:%.*]] = alloca %"class.sycl::_V1::span.22", align 8 -// CHECK-GLOBAL-NEXT: [[AGG_TMP2:%.*]] = alloca %"class.sycl::_V1::detail::accessor_iterator", align 8 -// CHECK-GLOBAL-NEXT: [[AGG_TMP3:%.*]] = alloca %"class.sycl::_V1::ext::oneapi::experimental::properties.28", align 1 +// CHECK-GLOBAL-NEXT: [[AGG_TMP_I:%.*]] = alloca %"struct.sycl::_V1::sub_group", align 1 +// CHECK-GLOBAL-NEXT: [[AGG_TMP1_I:%.*]] = alloca %"class.sycl::_V1::span.22", align 8 +// CHECK-GLOBAL-NEXT: [[AGG_TMP3_I:%.*]] = alloca %"class.sycl::_V1::ext::oneapi::experimental::properties.28", align 1 +// CHECK-GLOBAL-NEXT: [[AGG_TMP26:%.*]] = alloca %"class.sycl::_V1::detail::accessor_iterator", align 8 // CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA15]] -// CHECK-GLOBAL-NEXT: store i64 [[TMP0]], ptr [[AGG_TMP1]], align 8, !tbaa [[TBAA15]] -// CHECK-GLOBAL-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr align 8 [[AGG_TMP2]], ptr addrspace(4) align 8 [[ITER:%.*]], i64 80, i1 false), !tbaa.struct [[TBAA_STRUCT92:![0-9]+]] -// CHECK-GLOBAL-NEXT: tail call spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2ENS0_6detail17accessor_iteratorIiLi1EEENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSC_INS3_21contiguous_memory_keyEJEEENSC_INS3_14full_group_keyEJEEENSC_INSA_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESS_NS0_4spanISQ_XT1_EEESR_ST_(ptr noundef nonnull byval(%"struct.sycl::_V1::sub_group") align 1 [[AGG_TMP]], ptr noundef nonnull byval(%"class.sycl::_V1::span.22") align 8 [[AGG_TMP1]], ptr noundef nonnull byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[AGG_TMP2]], ptr noundef nonnull byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.28") align 1 [[AGG_TMP3]]) #[[ATTR7]] +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 80, ptr nonnull [[AGG_TMP26]]) +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[AGG_TMP1_I]]) +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[AGG_TMP3_I]]) +// CHECK-GLOBAL-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr noundef nonnull align 8 dereferenceable(80) [[AGG_TMP26]], ptr addrspace(4) noundef align 8 dereferenceable(80) [[ITER:%.*]], i64 80, i1 false) +// CHECK-GLOBAL-NEXT: store i64 [[TMP0]], ptr [[AGG_TMP1_I]], align 8, !tbaa [[TBAA15]] +// CHECK-GLOBAL-NEXT: tail call spir_func void @_ZN4sycl3_V13ext6oneapi12experimental6detail16group_store_implINS0_9sub_groupEiLm2ENS0_6detail17accessor_iteratorIiLi1EEENS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSC_INS3_21contiguous_memory_keyEJEEENSC_INS3_14full_group_keyEJEEENSC_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESS_NS0_4spanISQ_XT1_EEESR_ST_(ptr noundef nonnull byval(%"struct.sycl::_V1::sub_group") align 1 [[AGG_TMP_I]], ptr noundef nonnull byval(%"class.sycl::_V1::span.22") align 8 [[AGG_TMP1_I]], ptr noundef nonnull byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[AGG_TMP26]], ptr noundef nonnull byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.28") align 1 [[AGG_TMP3_I]]) #[[ATTR7]] +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 80, ptr nonnull [[AGG_TMP26]]) +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[AGG_TMP1_I]]) +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[AGG_TMP3_I]]) // CHECK-GLOBAL-NEXT: ret void // // CHECK-LOCAL-LABEL: @_ZN7striped34test_accessor_iter_force_optimizedERN4sycl3_V19sub_groupENS1_4spanIiLm2EEERNS1_6detail17accessor_iteratorIiLi1EEE( // CHECK-LOCAL-NEXT: entry: -// CHECK-LOCAL-NEXT: [[AGG_TMP:%.*]] = alloca %"struct.sycl::_V1::sub_group", align 1 -// CHECK-LOCAL-NEXT: [[AGG_TMP1:%.*]] = alloca %"class.sycl::_V1::span.22", align 8 -// CHECK-LOCAL-NEXT: [[AGG_TMP2:%.*]] = alloca %"class.sycl::_V1::detail::accessor_iterator", align 8 -// CHECK-LOCAL-NEXT: [[AGG_TMP3:%.*]] = alloca %"class.sycl::_V1::ext::oneapi::experimental::properties.28", align 1 +// CHECK-LOCAL-NEXT: [[AGG_TMP_I:%.*]] = alloca %"struct.sycl::_V1::sub_group", align 1 +// CHECK-LOCAL-NEXT: [[AGG_TMP1_I:%.*]] = alloca %"class.sycl::_V1::span.22", align 8 +// CHECK-LOCAL-NEXT: [[AGG_TMP3_I:%.*]] = alloca %"class.sycl::_V1::ext::oneapi::experimental::properties.28", align 1 +// CHECK-LOCAL-NEXT: [[AGG_TMP26:%.*]] = alloca %"class.sycl::_V1::detail::accessor_iterator", align 8 // CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA15]] -// CHECK-LOCAL-NEXT: store i64 [[TMP0]], ptr [[AGG_TMP1]], align 8, !tbaa [[TBAA15]] -// CHECK-LOCAL-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr align 8 [[AGG_TMP2]], ptr addrspace(4) align 8 [[ITER:%.*]], i64 80, i1 false), !tbaa.struct [[TBAA_STRUCT92:![0-9]+]] -// CHECK-LOCAL-NEXT: tail call spir_func void @_ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm2ENS0_6detail17accessor_iteratorIiLi1EEENS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSC_INS3_21contiguous_memory_keyEJEEENSC_INS3_14full_group_keyEJEEENSC_INSA_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESS_NS0_4spanISQ_XT1_EEESR_ST_(ptr noundef nonnull byval(%"struct.sycl::_V1::sub_group") align 1 [[AGG_TMP]], ptr noundef nonnull byval(%"class.sycl::_V1::span.22") align 8 [[AGG_TMP1]], ptr noundef nonnull byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[AGG_TMP2]], ptr noundef nonnull byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.28") align 1 [[AGG_TMP3]]) #[[ATTR7]] +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 80, ptr nonnull [[AGG_TMP26]]) +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[AGG_TMP1_I]]) +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[AGG_TMP3_I]]) +// CHECK-LOCAL-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr noundef nonnull align 8 dereferenceable(80) [[AGG_TMP26]], ptr addrspace(4) noundef align 8 dereferenceable(80) [[ITER:%.*]], i64 80, i1 false) +// CHECK-LOCAL-NEXT: store i64 [[TMP0]], ptr [[AGG_TMP1_I]], align 8, !tbaa [[TBAA15]] +// CHECK-LOCAL-NEXT: tail call spir_func void @_ZN4sycl3_V13ext6oneapi12experimental6detail16group_store_implINS0_9sub_groupEiLm2ENS0_6detail17accessor_iteratorIiLi1EEENS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSC_INS3_21contiguous_memory_keyEJEEENSC_INS3_14full_group_keyEJEEENSC_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESS_NS0_4spanISQ_XT1_EEESR_ST_(ptr noundef nonnull byval(%"struct.sycl::_V1::sub_group") align 1 [[AGG_TMP_I]], ptr noundef nonnull byval(%"class.sycl::_V1::span.22") align 8 [[AGG_TMP1_I]], ptr noundef nonnull byval(%"class.sycl::_V1::detail::accessor_iterator") align 8 [[AGG_TMP26]], ptr noundef nonnull byval(%"class.sycl::_V1::ext::oneapi::experimental::properties.28") align 1 [[AGG_TMP3_I]]) #[[ATTR7]] +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 80, ptr nonnull [[AGG_TMP26]]) +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr nonnull [[AGG_TMP_I]]) +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[AGG_TMP1_I]]) +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[AGG_TMP3_I]]) // CHECK-LOCAL-NEXT: ret void // SYCL_EXTERNAL void test_accessor_iter_force_optimized(sycl::sub_group &sg, @@ -1085,115 +1101,115 @@ SYCL_EXTERNAL void test_accessor_iter_force_optimized(sycl::sub_group &sg, // CHECK-GLOBAL-LABEL: @_ZN7striped16test_four_shortsERN4sycl3_V19sub_groupENS1_4spanIsLm4EEEPU3AS1s( // CHECK-GLOBAL-NEXT: entry: -// CHECK-GLOBAL-NEXT: [[VALUES_I:%.*]] = alloca [4 x i16], align 2 +// CHECK-GLOBAL-NEXT: [[VALUES_I_I:%.*]] = alloca [4 x i16], align 2 // CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA25]] // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// CHECK-GLOBAL-NEXT: [[CMP_I_I:%.*]] = icmp ne ptr addrspace(1) [[P:%.*]], null -// CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I]]) +// CHECK-GLOBAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(1) [[P:%.*]], null +// CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) // CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = ptrtoint ptr addrspace(1) [[P]] to i64 -// CHECK-GLOBAL-NEXT: [[REM_I_I:%.*]] = and i64 [[TMP2]], 15 -// CHECK-GLOBAL-NEXT: [[CMP1_I_NOT_I:%.*]] = icmp eq i64 [[REM_I_I]], 0 -// CHECK-GLOBAL-NEXT: br i1 [[CMP1_I_NOT_I]], label [[IF_END_I:%.*]], label [[IF_THEN_I:%.*]] -// CHECK-GLOBAL: if.then.i: +// CHECK-GLOBAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP2]], 15 +// CHECK-GLOBAL-NEXT: [[CMP1_I_NOT_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 +// CHECK-GLOBAL-NEXT: br i1 [[CMP1_I_NOT_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] +// CHECK-GLOBAL: if.then.i.i: +// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] +// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META109:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META112:![0-9]+]] +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-GLOBAL: for.cond.i.i.i: +// CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I_I]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-GLOBAL-NEXT: [[CMP_I20_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 4 +// CHECK-GLOBAL-NEXT: br i1 [[CMP_I20_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL16GROUP_STORE_IMPLINS0_9SUB_GROUPESLM4EPU3AS1SNS3_10PROPERTIESINS4_20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS4_9NAIVE_KEYEJEEENSB_INS4_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_NS0_4SPANISR_XT1_EEESS_SU__EXIT_I_I:%.*]] +// CHECK-GLOBAL: for.body.i.i.i: +// CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-GLOBAL-NEXT: [[TMP5:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP4]], [[I_0_I_I_I]] +// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I_I]] +// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[P]], i64 [[IDXPROM_I_I_I]] +// CHECK-GLOBAL-NEXT: store i16 [[TMP5]], ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP115:![0-9]+]] +// CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental6detail16group_store_implINS0_9sub_groupEsLm4EPU3AS1sNS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS4_9naive_keyEJEEENSB_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_NS0_4spanISR_XT1_EEESS_SU_.exit.i.i: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META112:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META115:![0-9]+]] +// CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM4EPU3AS1SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] +// CHECK-GLOBAL: if.end.i.i: +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] // CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I:%.*]] // CHECK-GLOBAL: for.cond.i.i: -// CHECK-GLOBAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-GLOBAL-NEXT: [[CMP_I19_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 4 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I19_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM4EPU3AS1SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_9NAIVE_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_NS0_4SPANISR_XT1_EEESS_SU__EXIT_I:%.*]] +// CHECK-GLOBAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[IF_END_I_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] +// CHECK-GLOBAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 4 +// CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[FOR_COND_CLEANUP_I_I:%.*]] +// CHECK-GLOBAL: for.cond.cleanup.i.i: +// CHECK-GLOBAL-NEXT: [[TMP6:%.*]] = load <4 x i16>, ptr [[VALUES_I_I]], align 2, !tbaa [[TBAA34]] +// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIDv4_tEvPU3AS1tT_(ptr addrspace(1) noundef nonnull [[P]], <4 x i16> noundef [[TMP6]]) #[[ATTR7]] +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] +// CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM4EPU3AS1SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT]] // CHECK-GLOBAL: for.body.i.i: // CHECK-GLOBAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP5:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA30]] -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I:%.*]] = mul i32 [[TMP4]], [[I_0_I_I]] -// CHECK-GLOBAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I]] -// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[P]], i64 [[IDXPROM_I_I]] -// CHECK-GLOBAL-NEXT: store i16 [[TMP5]], ptr addrspace(1) [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I21_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] +// CHECK-GLOBAL-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I21_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [4 x i16], ptr [[VALUES_I_I]], i64 0, i64 [[CONV_I_I]] +// CHECK-GLOBAL-NEXT: store i16 [[TMP7]], ptr [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA30]] // CHECK-GLOBAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP118:![0-9]+]] -// CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEsLm4EPU3AS1sNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_9naive_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_NS0_4spanISR_XT1_EEESS_SU_.exit.i: -// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM4EPU3AS1SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] -// CHECK-GLOBAL: if.end.i: -// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VALUES_I]]) #[[ATTR9]] -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I:%.*]] -// CHECK-GLOBAL: for.cond.i: -// CHECK-GLOBAL-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[IF_END_I]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] -// CHECK-GLOBAL-NEXT: [[CMP_I:%.*]] = icmp samesign ult i32 [[I_0_I]], 4 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[FOR_COND_CLEANUP_I:%.*]] -// CHECK-GLOBAL: for.cond.cleanup.i: -// CHECK-GLOBAL-NEXT: [[TMP6:%.*]] = load <4 x i16>, ptr [[VALUES_I]], align 2, !tbaa [[TBAA34]] -// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIDv4_tEvPU3AS1tT_(ptr addrspace(1) noundef nonnull [[P]], <4 x i16> noundef [[TMP6]]) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES_I]]) #[[ATTR9]] -// CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM4EPU3AS1SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT]] -// CHECK-GLOBAL: for.body.i: -// CHECK-GLOBAL-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I20_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-GLOBAL-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I20_I]], align 2, !tbaa [[TBAA30]] -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x i16], ptr [[VALUES_I]], i64 0, i64 [[CONV_I]] -// CHECK-GLOBAL-NEXT: store i16 [[TMP7]], ptr [[ARRAYIDX_I]], align 2, !tbaa [[TBAA30]] -// CHECK-GLOBAL-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP119:![0-9]+]] +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP116:![0-9]+]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEsLm4EPU3AS1sNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit: // CHECK-GLOBAL-NEXT: ret void // // CHECK-LOCAL-LABEL: @_ZN7striped16test_four_shortsERN4sycl3_V19sub_groupENS1_4spanIsLm4EEEPU3AS3s( // CHECK-LOCAL-NEXT: entry: -// CHECK-LOCAL-NEXT: [[VALUES_I:%.*]] = alloca [4 x i16], align 2 +// CHECK-LOCAL-NEXT: [[VALUES_I_I:%.*]] = alloca [4 x i16], align 2 // CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA25]] // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// CHECK-LOCAL-NEXT: [[CMP_I_I:%.*]] = icmp ne ptr addrspace(3) [[P:%.*]], null -// CHECK-LOCAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I]]) +// CHECK-LOCAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(3) [[P:%.*]], null +// CHECK-LOCAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) // CHECK-LOCAL-NEXT: [[TMP2:%.*]] = ptrtoint ptr addrspace(3) [[P]] to i64 -// CHECK-LOCAL-NEXT: [[REM_I_I:%.*]] = and i64 [[TMP2]], 15 -// CHECK-LOCAL-NEXT: [[CMP1_I_NOT_I:%.*]] = icmp eq i64 [[REM_I_I]], 0 -// CHECK-LOCAL-NEXT: br i1 [[CMP1_I_NOT_I]], label [[IF_END_I:%.*]], label [[IF_THEN_I:%.*]] -// CHECK-LOCAL: if.then.i: +// CHECK-LOCAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP2]], 15 +// CHECK-LOCAL-NEXT: [[CMP1_I_NOT_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 +// CHECK-LOCAL-NEXT: br i1 [[CMP1_I_NOT_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] +// CHECK-LOCAL: if.then.i.i: +// CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] +// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META109:![0-9]+]] +// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META112:![0-9]+]] +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-LOCAL: for.cond.i.i.i: +// CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I_I]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-LOCAL-NEXT: [[CMP_I20_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 4 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I20_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL16GROUP_STORE_IMPLINS0_9SUB_GROUPESLM4EPU3AS3SNS3_10PROPERTIESINS4_20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS4_9NAIVE_KEYEJEEENSB_INS4_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_NS0_4SPANISR_XT1_EEESS_SU__EXIT_I_I:%.*]] +// CHECK-LOCAL: for.body.i.i.i: +// CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-LOCAL-NEXT: [[TMP5:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP4]], [[I_0_I_I_I]] +// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I_I]] +// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(3) [[P]], i64 [[IDXPROM_I_I_I]] +// CHECK-LOCAL-NEXT: store i16 [[TMP5]], ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP115:![0-9]+]] +// CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental6detail16group_store_implINS0_9sub_groupEsLm4EPU3AS3sNS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS4_9naive_keyEJEEENSB_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_NS0_4spanISR_XT1_EEESS_SU_.exit.i.i: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META112:![0-9]+]] -// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META115:![0-9]+]] +// CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM4EPU3AS3SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] +// CHECK-LOCAL: if.end.i.i: +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] // CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I:%.*]] // CHECK-LOCAL: for.cond.i.i: -// CHECK-LOCAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-LOCAL-NEXT: [[CMP_I19_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 4 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I19_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM4EPU3AS3SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_9NAIVE_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_NS0_4SPANISR_XT1_EEESS_SU__EXIT_I:%.*]] +// CHECK-LOCAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[IF_END_I_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] +// CHECK-LOCAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 4 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[FOR_COND_CLEANUP_I_I:%.*]] +// CHECK-LOCAL: for.cond.cleanup.i.i: +// CHECK-LOCAL-NEXT: [[TMP6:%.*]] = load <4 x i16>, ptr [[VALUES_I_I]], align 2, !tbaa [[TBAA34]] +// CHECK-LOCAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIDv4_tEvPU3AS3tT_(ptr addrspace(3) noundef nonnull [[P]], <4 x i16> noundef [[TMP6]]) #[[ATTR7]] +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] +// CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM4EPU3AS3SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT]] // CHECK-LOCAL: for.body.i.i: // CHECK-LOCAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-LOCAL-NEXT: [[TMP5:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA30]] -// CHECK-LOCAL-NEXT: [[MUL_I_I_I:%.*]] = mul i32 [[TMP4]], [[I_0_I_I]] -// CHECK-LOCAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I]] -// CHECK-LOCAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(3) [[P]], i64 [[IDXPROM_I_I]] -// CHECK-LOCAL-NEXT: store i16 [[TMP5]], ptr addrspace(3) [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I21_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] +// CHECK-LOCAL-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I21_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [4 x i16], ptr [[VALUES_I_I]], i64 0, i64 [[CONV_I_I]] +// CHECK-LOCAL-NEXT: store i16 [[TMP7]], ptr [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA30]] // CHECK-LOCAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP118:![0-9]+]] -// CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEsLm4EPU3AS3sNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_9naive_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_NS0_4spanISR_XT1_EEESS_SU_.exit.i: -// CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM4EPU3AS3SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] -// CHECK-LOCAL: if.end.i: -// CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VALUES_I]]) #[[ATTR9]] -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I:%.*]] -// CHECK-LOCAL: for.cond.i: -// CHECK-LOCAL-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[IF_END_I]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] -// CHECK-LOCAL-NEXT: [[CMP_I:%.*]] = icmp samesign ult i32 [[I_0_I]], 4 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[FOR_COND_CLEANUP_I:%.*]] -// CHECK-LOCAL: for.cond.cleanup.i: -// CHECK-LOCAL-NEXT: [[TMP6:%.*]] = load <4 x i16>, ptr [[VALUES_I]], align 2, !tbaa [[TBAA34]] -// CHECK-LOCAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIDv4_tEvPU3AS3tT_(ptr addrspace(3) noundef nonnull [[P]], <4 x i16> noundef [[TMP6]]) #[[ATTR7]] -// CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES_I]]) #[[ATTR9]] -// CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM4EPU3AS3SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT]] -// CHECK-LOCAL: for.body.i: -// CHECK-LOCAL-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I20_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-LOCAL-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I20_I]], align 2, !tbaa [[TBAA30]] -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x i16], ptr [[VALUES_I]], i64 0, i64 [[CONV_I]] -// CHECK-LOCAL-NEXT: store i16 [[TMP7]], ptr [[ARRAYIDX_I]], align 2, !tbaa [[TBAA30]] -// CHECK-LOCAL-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP119:![0-9]+]] +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP116:![0-9]+]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEsLm4EPU3AS3sNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit: // CHECK-LOCAL-NEXT: ret void // @@ -1206,115 +1222,115 @@ SYCL_EXTERNAL void test_four_shorts(sycl::sub_group &sg, span v, // CHECK-GLOBAL-LABEL: @_ZN7striped19test_sixteen_shortsERN4sycl3_V19sub_groupENS1_4spanIsLm16EEEPU3AS1s( // CHECK-GLOBAL-NEXT: entry: -// CHECK-GLOBAL-NEXT: [[VALUES_I:%.*]] = alloca [16 x i16], align 2 +// CHECK-GLOBAL-NEXT: [[VALUES_I_I:%.*]] = alloca [16 x i16], align 2 // CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA25]] // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// CHECK-GLOBAL-NEXT: [[CMP_I_I:%.*]] = icmp ne ptr addrspace(1) [[P:%.*]], null -// CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I]]) +// CHECK-GLOBAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(1) [[P:%.*]], null +// CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) // CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = ptrtoint ptr addrspace(1) [[P]] to i64 -// CHECK-GLOBAL-NEXT: [[REM_I_I:%.*]] = and i64 [[TMP2]], 15 -// CHECK-GLOBAL-NEXT: [[CMP1_I_NOT_I:%.*]] = icmp eq i64 [[REM_I_I]], 0 -// CHECK-GLOBAL-NEXT: br i1 [[CMP1_I_NOT_I]], label [[IF_END_I:%.*]], label [[IF_THEN_I:%.*]] -// CHECK-GLOBAL: if.then.i: +// CHECK-GLOBAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP2]], 15 +// CHECK-GLOBAL-NEXT: [[CMP1_I_NOT_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 +// CHECK-GLOBAL-NEXT: br i1 [[CMP1_I_NOT_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] +// CHECK-GLOBAL: if.then.i.i: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META121:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META124:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META118:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META121:![0-9]+]] +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-GLOBAL: for.cond.i.i.i: +// CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I_I]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-GLOBAL-NEXT: [[CMP_I20_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 16 +// CHECK-GLOBAL-NEXT: br i1 [[CMP_I20_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL16GROUP_STORE_IMPLINS0_9SUB_GROUPESLM16EPU3AS1SNS3_10PROPERTIESINS4_20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS4_9NAIVE_KEYEJEEENSB_INS4_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_NS0_4SPANISR_XT1_EEESS_SU__EXIT_I_I:%.*]] +// CHECK-GLOBAL: for.body.i.i.i: +// CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-GLOBAL-NEXT: [[TMP5:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP4]], [[I_0_I_I_I]] +// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I_I]] +// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[P]], i64 [[IDXPROM_I_I_I]] +// CHECK-GLOBAL-NEXT: store i16 [[TMP5]], ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP124:![0-9]+]] +// CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental6detail16group_store_implINS0_9sub_groupEsLm16EPU3AS1sNS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS4_9naive_keyEJEEENSB_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_NS0_4spanISR_XT1_EEESS_SU_.exit.i.i: +// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] +// CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM16EPU3AS1SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] +// CHECK-GLOBAL: if.end.i.i: +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] // CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I:%.*]] // CHECK-GLOBAL: for.cond.i.i: -// CHECK-GLOBAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-GLOBAL-NEXT: [[CMP_I19_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 16 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I19_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM16EPU3AS1SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_9NAIVE_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_NS0_4SPANISR_XT1_EEESS_SU__EXIT_I:%.*]] +// CHECK-GLOBAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[IF_END_I_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] +// CHECK-GLOBAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 16 +// CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[FOR_COND_CLEANUP_I_I:%.*]] +// CHECK-GLOBAL: for.cond.cleanup.i.i: +// CHECK-GLOBAL-NEXT: [[TMP6:%.*]] = load <16 x i16>, ptr [[VALUES_I_I]], align 2, !tbaa [[TBAA34]] +// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIDv16_tEvPU3AS1tT_(ptr addrspace(1) noundef nonnull [[P]], <16 x i16> noundef [[TMP6]]) #[[ATTR7]] +// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] +// CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM16EPU3AS1SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT]] // CHECK-GLOBAL: for.body.i.i: // CHECK-GLOBAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP5:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA30]] -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I:%.*]] = mul i32 [[TMP4]], [[I_0_I_I]] -// CHECK-GLOBAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I]] -// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[P]], i64 [[IDXPROM_I_I]] -// CHECK-GLOBAL-NEXT: store i16 [[TMP5]], ptr addrspace(1) [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I21_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] +// CHECK-GLOBAL-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I21_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [16 x i16], ptr [[VALUES_I_I]], i64 0, i64 [[CONV_I_I]] +// CHECK-GLOBAL-NEXT: store i16 [[TMP7]], ptr [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA30]] // CHECK-GLOBAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP127:![0-9]+]] -// CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEsLm16EPU3AS1sNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_9naive_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_NS0_4spanISR_XT1_EEESS_SU_.exit.i: -// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM16EPU3AS1SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] -// CHECK-GLOBAL: if.end.i: -// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[VALUES_I]]) #[[ATTR9]] -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I:%.*]] -// CHECK-GLOBAL: for.cond.i: -// CHECK-GLOBAL-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[IF_END_I]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] -// CHECK-GLOBAL-NEXT: [[CMP_I:%.*]] = icmp samesign ult i32 [[I_0_I]], 16 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[FOR_COND_CLEANUP_I:%.*]] -// CHECK-GLOBAL: for.cond.cleanup.i: -// CHECK-GLOBAL-NEXT: [[TMP6:%.*]] = load <16 x i16>, ptr [[VALUES_I]], align 2, !tbaa [[TBAA34]] -// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIDv16_tEvPU3AS1tT_(ptr addrspace(1) noundef nonnull [[P]], <16 x i16> noundef [[TMP6]]) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VALUES_I]]) #[[ATTR9]] -// CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM16EPU3AS1SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT]] -// CHECK-GLOBAL: for.body.i: -// CHECK-GLOBAL-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I20_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-GLOBAL-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I20_I]], align 2, !tbaa [[TBAA30]] -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [16 x i16], ptr [[VALUES_I]], i64 0, i64 [[CONV_I]] -// CHECK-GLOBAL-NEXT: store i16 [[TMP7]], ptr [[ARRAYIDX_I]], align 2, !tbaa [[TBAA30]] -// CHECK-GLOBAL-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP128:![0-9]+]] +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP125:![0-9]+]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEsLm16EPU3AS1sNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit: // CHECK-GLOBAL-NEXT: ret void // // CHECK-LOCAL-LABEL: @_ZN7striped19test_sixteen_shortsERN4sycl3_V19sub_groupENS1_4spanIsLm16EEEPU3AS3s( // CHECK-LOCAL-NEXT: entry: -// CHECK-LOCAL-NEXT: [[VALUES_I:%.*]] = alloca [16 x i16], align 2 +// CHECK-LOCAL-NEXT: [[VALUES_I_I:%.*]] = alloca [16 x i16], align 2 // CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA25]] // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// CHECK-LOCAL-NEXT: [[CMP_I_I:%.*]] = icmp ne ptr addrspace(3) [[P:%.*]], null -// CHECK-LOCAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I]]) +// CHECK-LOCAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(3) [[P:%.*]], null +// CHECK-LOCAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) // CHECK-LOCAL-NEXT: [[TMP2:%.*]] = ptrtoint ptr addrspace(3) [[P]] to i64 -// CHECK-LOCAL-NEXT: [[REM_I_I:%.*]] = and i64 [[TMP2]], 15 -// CHECK-LOCAL-NEXT: [[CMP1_I_NOT_I:%.*]] = icmp eq i64 [[REM_I_I]], 0 -// CHECK-LOCAL-NEXT: br i1 [[CMP1_I_NOT_I]], label [[IF_END_I:%.*]], label [[IF_THEN_I:%.*]] -// CHECK-LOCAL: if.then.i: +// CHECK-LOCAL-NEXT: [[REM_I_I_I:%.*]] = and i64 [[TMP2]], 15 +// CHECK-LOCAL-NEXT: [[CMP1_I_NOT_I_I:%.*]] = icmp eq i64 [[REM_I_I_I]], 0 +// CHECK-LOCAL-NEXT: br i1 [[CMP1_I_NOT_I_I]], label [[IF_END_I_I:%.*]], label [[IF_THEN_I_I:%.*]] +// CHECK-LOCAL: if.then.i.i: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META121:![0-9]+]] -// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META124:![0-9]+]] +// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META118:![0-9]+]] +// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META121:![0-9]+]] +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-LOCAL: for.cond.i.i.i: +// CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I_I]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-LOCAL-NEXT: [[CMP_I20_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 16 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I20_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL6DETAIL16GROUP_STORE_IMPLINS0_9SUB_GROUPESLM16EPU3AS3SNS3_10PROPERTIESINS4_20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS4_9NAIVE_KEYEJEEENSB_INS4_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_NS0_4SPANISR_XT1_EEESS_SU__EXIT_I_I:%.*]] +// CHECK-LOCAL: for.body.i.i.i: +// CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-LOCAL-NEXT: [[TMP5:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP4]], [[I_0_I_I_I]] +// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I_I]] +// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(3) [[P]], i64 [[IDXPROM_I_I_I]] +// CHECK-LOCAL-NEXT: store i16 [[TMP5]], ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP124:![0-9]+]] +// CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental6detail16group_store_implINS0_9sub_groupEsLm16EPU3AS3sNS3_10propertiesINS4_20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS4_9naive_keyEJEEENSB_INS4_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_NS0_4spanISR_XT1_EEESS_SU_.exit.i.i: +// CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] +// CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM16EPU3AS3SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] +// CHECK-LOCAL: if.end.i.i: +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] // CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I:%.*]] // CHECK-LOCAL: for.cond.i.i: -// CHECK-LOCAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-LOCAL-NEXT: [[CMP_I19_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 16 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I19_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM16EPU3AS3SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_9NAIVE_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEEST_NS0_4SPANISR_XT1_EEESS_SU__EXIT_I:%.*]] +// CHECK-LOCAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[IF_END_I_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] +// CHECK-LOCAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 16 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[FOR_COND_CLEANUP_I_I:%.*]] +// CHECK-LOCAL: for.cond.cleanup.i.i: +// CHECK-LOCAL-NEXT: [[TMP6:%.*]] = load <16 x i16>, ptr [[VALUES_I_I]], align 2, !tbaa [[TBAA34]] +// CHECK-LOCAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIDv16_tEvPU3AS3tT_(ptr addrspace(3) noundef nonnull [[P]], <16 x i16> noundef [[TMP6]]) #[[ATTR7]] +// CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] +// CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM16EPU3AS3SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT]] // CHECK-LOCAL: for.body.i.i: // CHECK-LOCAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-LOCAL-NEXT: [[TMP5:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA30]] -// CHECK-LOCAL-NEXT: [[MUL_I_I_I:%.*]] = mul i32 [[TMP4]], [[I_0_I_I]] -// CHECK-LOCAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I]] -// CHECK-LOCAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(3) [[P]], i64 [[IDXPROM_I_I]] -// CHECK-LOCAL-NEXT: store i16 [[TMP5]], ptr addrspace(3) [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I21_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] +// CHECK-LOCAL-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I21_I_I]], align 2, !tbaa [[TBAA30]] +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [16 x i16], ptr [[VALUES_I_I]], i64 0, i64 [[CONV_I_I]] +// CHECK-LOCAL-NEXT: store i16 [[TMP7]], ptr [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA30]] // CHECK-LOCAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP127:![0-9]+]] -// CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEsLm16EPU3AS3sNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_9naive_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeEST_NS0_4spanISR_XT1_EEESS_SU_.exit.i: -// CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM16EPU3AS3SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] -// CHECK-LOCAL: if.end.i: -// CHECK-LOCAL-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[VALUES_I]]) #[[ATTR9]] -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I:%.*]] -// CHECK-LOCAL: for.cond.i: -// CHECK-LOCAL-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[IF_END_I]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] -// CHECK-LOCAL-NEXT: [[CMP_I:%.*]] = icmp samesign ult i32 [[I_0_I]], 16 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[FOR_COND_CLEANUP_I:%.*]] -// CHECK-LOCAL: for.cond.cleanup.i: -// CHECK-LOCAL-NEXT: [[TMP6:%.*]] = load <16 x i16>, ptr [[VALUES_I]], align 2, !tbaa [[TBAA34]] -// CHECK-LOCAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIDv16_tEvPU3AS3tT_(ptr addrspace(3) noundef nonnull [[P]], <16 x i16> noundef [[TMP6]]) #[[ATTR7]] -// CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VALUES_I]]) #[[ATTR9]] -// CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM16EPU3AS3SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT]] -// CHECK-LOCAL: for.body.i: -// CHECK-LOCAL-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I20_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]] -// CHECK-LOCAL-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I20_I]], align 2, !tbaa [[TBAA30]] -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [16 x i16], ptr [[VALUES_I]], i64 0, i64 [[CONV_I]] -// CHECK-LOCAL-NEXT: store i16 [[TMP7]], ptr [[ARRAYIDX_I]], align 2, !tbaa [[TBAA30]] -// CHECK-LOCAL-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP128:![0-9]+]] +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP125:![0-9]+]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEsLm16EPU3AS3sNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit: // CHECK-LOCAL-NEXT: ret void // @@ -1328,24 +1344,24 @@ SYCL_EXTERNAL void test_sixteen_shorts(sycl::sub_group &sg, span v, // CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA15]] // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META130:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META133:![0-9]+]] -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I:%.*]] -// CHECK-GLOBAL: for.cond.i.i: -// CHECK-GLOBAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-GLOBAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 3 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM3EPU3AS1INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] -// CHECK-GLOBAL: for.body.i.i: -// CHECK-GLOBAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I_I]] -// CHECK-GLOBAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I]] -// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 [[IDXPROM_I_I]] -// CHECK-GLOBAL-NEXT: store i32 [[TMP4]], ptr addrspace(1) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP136:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META127:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META130:![0-9]+]] +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-GLOBAL: for.cond.i.i.i: +// CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-GLOBAL-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 3 +// CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM3EPU3AS1INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] +// CHECK-GLOBAL: for.body.i.i.i: +// CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I_I_I]] +// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I_I]] +// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 [[IDXPROM_I_I_I]] +// CHECK-GLOBAL-NEXT: store i32 [[TMP4]], ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP133:![0-9]+]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm3EPU3AS1iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: ret void @@ -1355,24 +1371,24 @@ SYCL_EXTERNAL void test_sixteen_shorts(sycl::sub_group &sg, span v, // CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA15]] // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META130:![0-9]+]] -// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META133:![0-9]+]] -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I:%.*]] -// CHECK-LOCAL: for.cond.i.i: -// CHECK-LOCAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-LOCAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 3 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM3EPU3AS3INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] -// CHECK-LOCAL: for.body.i.i: -// CHECK-LOCAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[MUL_I_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I_I]] -// CHECK-LOCAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I]] -// CHECK-LOCAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P:%.*]], i64 [[IDXPROM_I_I]] -// CHECK-LOCAL-NEXT: store i32 [[TMP4]], ptr addrspace(3) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP136:![0-9]+]] +// CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META127:![0-9]+]] +// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META130:![0-9]+]] +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-LOCAL: for.cond.i.i.i: +// CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-LOCAL-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 3 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM3EPU3AS3INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] +// CHECK-LOCAL: for.body.i.i.i: +// CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I_I_I]] +// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I_I]] +// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P:%.*]], i64 [[IDXPROM_I_I_I]] +// CHECK-LOCAL-NEXT: store i32 [[TMP4]], ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP133:![0-9]+]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm3EPU3AS3iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-LOCAL-NEXT: ret void @@ -1388,24 +1404,24 @@ SYCL_EXTERNAL void test_non_power_of_two(sycl::sub_group &sg, span v, // CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA15]] // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META138:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META141:![0-9]+]] -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I:%.*]] -// CHECK-GLOBAL: for.cond.i.i: -// CHECK-GLOBAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-GLOBAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 16 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM16EPU3AS1INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] -// CHECK-GLOBAL: for.body.i.i: -// CHECK-GLOBAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I_I]] -// CHECK-GLOBAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I]] -// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 [[IDXPROM_I_I]] -// CHECK-GLOBAL-NEXT: store i32 [[TMP4]], ptr addrspace(1) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP144:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META135:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META138:![0-9]+]] +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-GLOBAL: for.cond.i.i.i: +// CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-GLOBAL-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 16 +// CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM16EPU3AS1INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] +// CHECK-GLOBAL: for.body.i.i.i: +// CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I_I_I]] +// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I_I]] +// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 [[IDXPROM_I_I_I]] +// CHECK-GLOBAL-NEXT: store i32 [[TMP4]], ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP141:![0-9]+]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm16EPU3AS1iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: ret void @@ -1415,24 +1431,24 @@ SYCL_EXTERNAL void test_non_power_of_two(sycl::sub_group &sg, span v, // CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA15]] // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META138:![0-9]+]] -// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META141:![0-9]+]] -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I:%.*]] -// CHECK-LOCAL: for.cond.i.i: -// CHECK-LOCAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-LOCAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 16 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM16EPU3AS3INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] -// CHECK-LOCAL: for.body.i.i: -// CHECK-LOCAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[MUL_I_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I_I]] -// CHECK-LOCAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I]] -// CHECK-LOCAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P:%.*]], i64 [[IDXPROM_I_I]] -// CHECK-LOCAL-NEXT: store i32 [[TMP4]], ptr addrspace(3) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP144:![0-9]+]] +// CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META135:![0-9]+]] +// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META138:![0-9]+]] +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-LOCAL: for.cond.i.i.i: +// CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-LOCAL-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 16 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM16EPU3AS3INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] +// CHECK-LOCAL: for.body.i.i.i: +// CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I_I_I]] +// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I_I]] +// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P:%.*]], i64 [[IDXPROM_I_I_I]] +// CHECK-LOCAL-NEXT: store i32 [[TMP4]], ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP141:![0-9]+]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm16EPU3AS3iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-LOCAL-NEXT: ret void @@ -1449,24 +1465,24 @@ SYCL_EXTERNAL void test_sixteen_ints(sycl::sub_group &sg, span v, // CHECK-GLOBAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA15]] // CHECK-GLOBAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META146:![0-9]+]] -// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META149:![0-9]+]] -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I:%.*]] -// CHECK-GLOBAL: for.cond.i.i: -// CHECK-GLOBAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-GLOBAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 11 -// CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM11EPU3AS1INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] -// CHECK-GLOBAL: for.body.i.i: -// CHECK-GLOBAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[MUL_I_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I_I]] -// CHECK-GLOBAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I]] -// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 [[IDXPROM_I_I]] -// CHECK-GLOBAL-NEXT: store i32 [[TMP4]], ptr addrspace(1) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-GLOBAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP152:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META143:![0-9]+]] +// CHECK-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META146:![0-9]+]] +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-GLOBAL: for.cond.i.i.i: +// CHECK-GLOBAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-GLOBAL-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 11 +// CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM11EPU3AS1INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] +// CHECK-GLOBAL: for.body.i.i.i: +// CHECK-GLOBAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-GLOBAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I_I_I]] +// CHECK-GLOBAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I_I]] +// CHECK-GLOBAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-GLOBAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 [[IDXPROM_I_I_I]] +// CHECK-GLOBAL-NEXT: store i32 [[TMP4]], ptr addrspace(1) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-GLOBAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-GLOBAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP149:![0-9]+]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm11EPU3AS1iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit: // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: ret void @@ -1476,24 +1492,24 @@ SYCL_EXTERNAL void test_sixteen_ints(sycl::sub_group &sg, span v, // CHECK-LOCAL-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA15]] // CHECK-LOCAL-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] -// CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META146:![0-9]+]] -// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META149:![0-9]+]] -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I:%.*]] -// CHECK-LOCAL: for.cond.i.i: -// CHECK-LOCAL-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] -// CHECK-LOCAL-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 11 -// CHECK-LOCAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM11EPU3AS3INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] -// CHECK-LOCAL: for.body.i.i: -// CHECK-LOCAL-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]] -// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[MUL_I_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I_I]] -// CHECK-LOCAL-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I]] -// CHECK-LOCAL-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64 -// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P:%.*]], i64 [[IDXPROM_I_I]] -// CHECK-LOCAL-NEXT: store i32 [[TMP4]], ptr addrspace(3) [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA8]] -// CHECK-LOCAL-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1 -// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP152:![0-9]+]] +// CHECK-LOCAL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META143:![0-9]+]] +// CHECK-LOCAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META146:![0-9]+]] +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK-LOCAL: for.cond.i.i.i: +// CHECK-LOCAL-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-LOCAL-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 11 +// CHECK-LOCAL-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM11EPU3AS3INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT:%.*]] +// CHECK-LOCAL: for.body.i.i.i: +// CHECK-LOCAL-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[I_0_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I_I]] +// CHECK-LOCAL-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX_I_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[MUL_I_I_I_I:%.*]] = mul i32 [[TMP3]], [[I_0_I_I_I]] +// CHECK-LOCAL-NEXT: [[ADD_I_I_I_I:%.*]] = add i32 [[TMP2]], [[MUL_I_I_I_I]] +// CHECK-LOCAL-NEXT: [[IDXPROM_I_I_I:%.*]] = sext i32 [[ADD_I_I_I_I]] to i64 +// CHECK-LOCAL-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P:%.*]], i64 [[IDXPROM_I_I_I]] +// CHECK-LOCAL-NEXT: store i32 [[TMP4]], ptr addrspace(3) [[ARRAYIDX_I_I_I]], align 4, !tbaa [[TBAA8]] +// CHECK-LOCAL-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-LOCAL-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP149:![0-9]+]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiLm11EPU3AS3iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit: // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-LOCAL-NEXT: ret void diff --git a/sycl/test/group_load_store_alignment_negative.cpp b/sycl/test/group_load_store_alignment_negative.cpp new file mode 100644 index 0000000000000..c3aac19c446bd --- /dev/null +++ b/sycl/test/group_load_store_alignment_negative.cpp @@ -0,0 +1,20 @@ +// RUN: %clangxx %s -fsycl-device-only -Xclang -verify %s -Xclang -verify-ignore-unexpected=note,warning + +#include + +using namespace sycl; +using namespace sycl::ext::oneapi::experimental; + +// expected-error@* {{group_load requires a pointer if alignment property is used}} +SYCL_EXTERNAL void test(sycl::sub_group &sg, + sycl::detail::accessor_iterator accessor_iter, + int &out) { + group_load(sg, accessor_iter, out, properties(alignment<16>)); +} + +// expected-error@* {{group_store requires a pointer if alignment property is used}} +SYCL_EXTERNAL void test(sycl::sub_group &sg, + sycl::detail::accessor_iterator accessor_iter, + int v) { + group_store(sg, v, accessor_iter, properties(alignment<16>)); +} From 823f387b75531505941d58c8216960cd478807d5 Mon Sep 17 00:00:00 2001 From: Nick Sarnie Date: Tue, 11 Feb 2025 00:43:37 +0900 Subject: [PATCH 05/13] [CI] Add Battlemage testing to postcommit (#16910) Add a Battlemage E2E L0 testing job to postcommit, and disabling failing tests. --------- Signed-off-by: Sarnie, Nick --- .github/workflows/sycl-post-commit.yml | 5 +++++ sycl/test-e2e/AddressSanitizer/lit.local.cfg | 3 +++ sycl/test-e2e/AtomicRef/atomic_memory_order_seq_cst.cpp | 2 ++ sycl/test-e2e/InlineAsm/letter_example.cpp | 2 ++ sycl/test-e2e/InlineAsm/malloc_shared_32.cpp | 2 ++ .../Matrix/joint_matrix_bf16_fill_k_cache_arg_dim.cpp | 2 ++ .../Matrix/joint_matrix_bf16_fill_k_cache_runtime_dim.cpp | 2 ++ sycl/test-e2e/Matrix/joint_matrix_out_bounds.cpp | 3 ++- .../bindless_images/examples/example_1_1D_read_write.cpp | 3 ++- 9 files changed, 22 insertions(+), 2 deletions(-) diff --git a/.github/workflows/sycl-post-commit.yml b/.github/workflows/sycl-post-commit.yml index 200a6a7e2129d..8ec35ab596538 100644 --- a/.github/workflows/sycl-post-commit.yml +++ b/.github/workflows/sycl-post-commit.yml @@ -52,6 +52,11 @@ jobs: runner: '["Linux", "arc"]' extra_lit_opts: --param matrix-xmx8=True reset_intel_gpu: true + - name: Intel Battlemage Graphics with Level Zero + runner: '["Linux", "bmg"]' + target_devices: level_zero:gpu + # The new Xe kernel driver used by BMG doesn't support resetting. + reset_intel_gpu: false - name: AMD/HIP runner: '["Linux", "amdgpu"]' image_options: -u 1001 --device=/dev/dri --device=/dev/kfd diff --git a/sycl/test-e2e/AddressSanitizer/lit.local.cfg b/sycl/test-e2e/AddressSanitizer/lit.local.cfg index dd59d9aec6b5c..29bd23b43efe6 100644 --- a/sycl/test-e2e/AddressSanitizer/lit.local.cfg +++ b/sycl/test-e2e/AddressSanitizer/lit.local.cfg @@ -19,3 +19,6 @@ config.unsupported_features += ['gpu-intel-gen12'] # CMPLRLLVM-64052 config.unsupported_features += ['spirv-backend'] + +# https://github.com/intel/llvm/issues/16920 +config.unsupported_features += ['arch-intel_gpu_bmg_g21'] diff --git a/sycl/test-e2e/AtomicRef/atomic_memory_order_seq_cst.cpp b/sycl/test-e2e/AtomicRef/atomic_memory_order_seq_cst.cpp index c29f3a3005a0b..686fac9965b48 100644 --- a/sycl/test-e2e/AtomicRef/atomic_memory_order_seq_cst.cpp +++ b/sycl/test-e2e/AtomicRef/atomic_memory_order_seq_cst.cpp @@ -1,5 +1,7 @@ // RUN: %{build} -O3 -o %t.out %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %} // RUN: %{run} %t.out +// UNSUPPORTED: arch-intel_gpu_bmg_g21 +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/16924 #include "atomic_memory_order.h" #include diff --git a/sycl/test-e2e/InlineAsm/letter_example.cpp b/sycl/test-e2e/InlineAsm/letter_example.cpp index 780e33d77e803..f6d3df75871c4 100644 --- a/sycl/test-e2e/InlineAsm/letter_example.cpp +++ b/sycl/test-e2e/InlineAsm/letter_example.cpp @@ -1,4 +1,6 @@ // REQUIRES: sg-16,aspect-usm_shared_allocations +// XFAIL: arch-intel_gpu_bmg_g21 +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16921 // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/InlineAsm/malloc_shared_32.cpp b/sycl/test-e2e/InlineAsm/malloc_shared_32.cpp index db96f418927dd..56b1cc95749b9 100644 --- a/sycl/test-e2e/InlineAsm/malloc_shared_32.cpp +++ b/sycl/test-e2e/InlineAsm/malloc_shared_32.cpp @@ -1,4 +1,6 @@ // REQUIRES: sg-32,aspect-usm_shared_allocations +// XFAIL: arch-intel_gpu_bmg_g21 +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16921 // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_arg_dim.cpp b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_arg_dim.cpp index 7f44294e1123d..9eaa4f0044dad 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_arg_dim.cpp +++ b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_arg_dim.cpp @@ -20,6 +20,8 @@ // test. // XFAIL: gpu-intel-dg2 && run-mode // XFAIL-TRACKER: GSD-10510 +// XFAIL: arch-intel_gpu_bmg_g21 +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16922 #include "common.hpp" #include "joint_matrix_bf16_fill_k_cache_impl.hpp" diff --git a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_runtime_dim.cpp b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_runtime_dim.cpp index a290a8cb00a6f..cbcd97d5c6c9b 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_runtime_dim.cpp +++ b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_runtime_dim.cpp @@ -20,6 +20,8 @@ // test. // XFAIL: gpu-intel-dg2 && run-mode // XFAIL-TRACKER: GSD-10510 +// XFAIL: arch-intel_gpu_bmg_g21 +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16922 #include "common.hpp" #include "joint_matrix_bf16_fill_k_cache_impl.hpp" diff --git a/sycl/test-e2e/Matrix/joint_matrix_out_bounds.cpp b/sycl/test-e2e/Matrix/joint_matrix_out_bounds.cpp index 4ca96773f4d8d..cfb475617b099 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_out_bounds.cpp +++ b/sycl/test-e2e/Matrix/joint_matrix_out_bounds.cpp @@ -10,7 +10,8 @@ // other triples // REQUIRES: aspect-ext_intel_matrix - +// XFAIL: arch-intel_gpu_bmg_g21 +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16922 // UNSUPPORTED: gpu-intel-dg2, cpu // UNSUPPORTED-INTENDED: Checked load/stores are not supported by DG2 and CPU HW diff --git a/sycl/test-e2e/bindless_images/examples/example_1_1D_read_write.cpp b/sycl/test-e2e/bindless_images/examples/example_1_1D_read_write.cpp index beef3c7fcf09c..fa797958c1aa6 100644 --- a/sycl/test-e2e/bindless_images/examples/example_1_1D_read_write.cpp +++ b/sycl/test-e2e/bindless_images/examples/example_1_1D_read_write.cpp @@ -1,5 +1,6 @@ // REQUIRES: aspect-ext_oneapi_bindless_images - +// UNSUPPORTED: arch-intel_gpu_bmg_g21 +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/16923 // UNSUPPORTED: hip // UNSUPPORTED-INTENDED: Undetermined issue in 'create_image' in this test. From c7c2b939a0bff86480f764434a77da5ee2bd7537 Mon Sep 17 00:00:00 2001 From: Nikita Kornev Date: Mon, 10 Feb 2025 17:16:17 +0100 Subject: [PATCH 06/13] [CI] Install pigz in docker container (#16945) pigz is a parallel implementation of gzip. coverity accepts a small range of archive formats: gzip, zip, lzma, xz or bz2. So using pigz to speed up the compression of build for coverity. Also remove the duplicate of zstd. --- devops/scripts/install_build_tools.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devops/scripts/install_build_tools.sh b/devops/scripts/install_build_tools.sh index a878f2807fd0e..6c47fce4bdd06 100755 --- a/devops/scripts/install_build_tools.sh +++ b/devops/scripts/install_build_tools.sh @@ -10,7 +10,6 @@ apt update && apt install -yqq \ python3-psutil \ python-is-python3 \ python3-pip \ - zstd \ ocl-icd-opencl-dev \ vim \ libffi-dev \ @@ -21,6 +20,7 @@ apt update && apt install -yqq \ zstd \ zip \ unzip \ + pigz \ jq \ curl \ libhwloc-dev \ From 549e165c300bbe9289f549f9352660f31efe2f4b Mon Sep 17 00:00:00 2001 From: David Garcia Orozco Date: Mon, 10 Feb 2025 09:17:11 -0700 Subject: [PATCH 07/13] [SYCL][E2E] Change method for setting a `RUN:` line to execute at run stage (#16907) Previously to explicitly mark that a `RUN:` line should run on the run-stage the `%if run-mode %{...%}` markup was used. However, this markup is misleading given that the entire line is skipped in `build-only` mode, not just the bit enclosed in the braces. This PR replaces this markup with `%{run-aux}`, to avoid this issue. --- sycl/test-e2e/Config/config.cpp | 10 +++++----- sycl/test-e2e/ESIMD/PerformanceTests/BitonicSortK.cpp | 4 ++-- .../test-e2e/ESIMD/PerformanceTests/BitonicSortKv2.cpp | 4 ++-- sycl/test-e2e/ESIMD/PerformanceTests/Stencil.cpp | 4 ++-- .../ESIMD/PerformanceTests/invoke_simd_smoke.cpp | 4 ++-- .../ESIMD/PerformanceTests/matrix_transpose.cpp | 4 ++-- sycl/test-e2e/ESIMD/PerformanceTests/stencil2.cpp | 4 ++-- sycl/test-e2e/ESIMD/preemption.cpp | 2 +- .../KernelAndProgram/trace_kernel_program_cache.cpp | 6 +++--- .../KernelCompiler/kernel_compiler_cache_eviction.cpp | 2 +- sycl/test-e2e/format.py | 2 +- sycl/test-e2e/lit.cfg.py | 3 +++ 12 files changed, 26 insertions(+), 23 deletions(-) diff --git a/sycl/test-e2e/Config/config.cpp b/sycl/test-e2e/Config/config.cpp index 5f82d3fa28111..d4741661e8e5a 100644 --- a/sycl/test-e2e/Config/config.cpp +++ b/sycl/test-e2e/Config/config.cpp @@ -8,13 +8,13 @@ // RUN: %{build} %debug_option %O0 -o %t.out // RUN: echo SYCL_PRINT_EXECUTION_GRAPH=always > %t.cfg // RUN: %{run-unfiltered-devices} env SYCL_CONFIG_FILE_NAME=%t.cfg %t.out -// RUN: %if run-mode %{cat *.dot > /dev/null%} -// RUN: %if run-mode %{rm *.dot%} +// RUN: %{run-aux} cat *.dot > /dev/null +// RUN: %{run-aux} rm *.dot // RUN: %{run-unfiltered-devices} env SYCL_PRINT_EXECUTION_GRAPH=always %t.out -// RUN: %if run-mode %{cat *.dot > /dev/null%} -// RUN: %if run-mode %{rm *.dot%} +// RUN: %{run-aux} cat *.dot > /dev/null +// RUN: %{run-aux} rm *.dot // RUN: %{run-unfiltered-devices} %t.out -// RUN: %if run-mode %{not cat *.dot > /dev/null%} +// RUN: %{run-aux} not cat *.dot > /dev/null #include diff --git a/sycl/test-e2e/ESIMD/PerformanceTests/BitonicSortK.cpp b/sycl/test-e2e/ESIMD/PerformanceTests/BitonicSortK.cpp index 91d3449d1e74f..4e6f7944ad88e 100644 --- a/sycl/test-e2e/ESIMD/PerformanceTests/BitonicSortK.cpp +++ b/sycl/test-e2e/ESIMD/PerformanceTests/BitonicSortK.cpp @@ -10,7 +10,7 @@ // RUN: mkdir -p %t.dir && %{build} -o %t.dir/exec.out // RUN: env IGC_DumpToCustomDir=%t.dir IGC_ShaderDumpEnable=1 %{run} %t.dir/exec.out -// RUN: %if run-mode %{python3 %S/instruction_count.py %t.dir 2914 ZTSZZN11BitonicSort5SolveEPjS0_jENKUlRN4sycl3_V17handlerEE0_clES4_E5Merge.asm%} -// RUN: %if run-mode %{echo "Baseline from driver version 1.3.30872"%} +// RUN: %{run-aux} python3 %S/instruction_count.py %t.dir 2914 ZTSZZN11BitonicSort5SolveEPjS0_jENKUlRN4sycl3_V17handlerEE0_clES4_E5Merge.asm +// RUN: %{run-aux} echo "Baseline from driver version 1.3.30872" #include "../BitonicSortK.cpp" diff --git a/sycl/test-e2e/ESIMD/PerformanceTests/BitonicSortKv2.cpp b/sycl/test-e2e/ESIMD/PerformanceTests/BitonicSortKv2.cpp index cbb609681a85a..8fb30097d50df 100644 --- a/sycl/test-e2e/ESIMD/PerformanceTests/BitonicSortKv2.cpp +++ b/sycl/test-e2e/ESIMD/PerformanceTests/BitonicSortKv2.cpp @@ -10,7 +10,7 @@ // RUN: mkdir -p %t.dir && %{build} -o %t.dir/exec.out // RUN: env IGC_DumpToCustomDir=%t.dir IGC_ShaderDumpEnable=1 %{run} %t.dir/exec.out -// RUN: %if run-mode %{python3 %S/instruction_count.py %t.dir 2969 ZTSZZN11BitonicSort5SolveEPjS0_jENKUlRN4sycl3_V17handlerEE0_clES4_E5Merge.asm%} -// RUN: %if run-mode %{echo "Baseline from driver version 1.3.30872"%} +// RUN: %{run-aux} python3 %S/instruction_count.py %t.dir 2969 ZTSZZN11BitonicSort5SolveEPjS0_jENKUlRN4sycl3_V17handlerEE0_clES4_E5Merge.asm +// RUN: %{run-aux} echo "Baseline from driver version 1.3.30872" #include "../BitonicSortKv2.cpp" diff --git a/sycl/test-e2e/ESIMD/PerformanceTests/Stencil.cpp b/sycl/test-e2e/ESIMD/PerformanceTests/Stencil.cpp index 42d906545207c..ee49ef1039c14 100644 --- a/sycl/test-e2e/ESIMD/PerformanceTests/Stencil.cpp +++ b/sycl/test-e2e/ESIMD/PerformanceTests/Stencil.cpp @@ -10,7 +10,7 @@ // RUN: mkdir -p %t.dir && %{build} -o %t.dir/exec.out // RUN: env IGC_DumpToCustomDir=%t.dir IGC_ShaderDumpEnable=1 %{run} %t.dir/exec.out -// RUN: %if run-mode %{python3 %S/instruction_count.py %t.dir 1699 ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E14Stencil_kernel.asm%} -// RUN: %if run-mode %{echo "Baseline from driver version 1.3.29138"%} +// RUN: %{run-aux} python3 %S/instruction_count.py %t.dir 1699 ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E14Stencil_kernel.asm +// RUN: %{run-aux} echo "Baseline from driver version 1.3.29138" #include "../Stencil.cpp" diff --git a/sycl/test-e2e/ESIMD/PerformanceTests/invoke_simd_smoke.cpp b/sycl/test-e2e/ESIMD/PerformanceTests/invoke_simd_smoke.cpp index 8c148091356fa..e9fed184270ed 100644 --- a/sycl/test-e2e/ESIMD/PerformanceTests/invoke_simd_smoke.cpp +++ b/sycl/test-e2e/ESIMD/PerformanceTests/invoke_simd_smoke.cpp @@ -12,7 +12,7 @@ // RUN: mkdir -p %t.dir && %{build} -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr -o %t.dir/exec.out // RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 IGC_DumpToCustomDir=%t.dir IGC_ShaderDumpEnable=1 %{run} %t.dir/exec.out -// RUN: %if run-mode %{python3 %S/instruction_count.py %t.dir 149 _simd16_entry_0001.asm%} -// RUN: %if run-mode %{echo "Baseline from driver version 1.3.29735"%} +// RUN: %{run-aux} python3 %S/instruction_count.py %t.dir 149 _simd16_entry_0001.asm +// RUN: %{run-aux} echo "Baseline from driver version 1.3.29735" #include "../../InvokeSimd/invoke_simd_smoke.cpp" diff --git a/sycl/test-e2e/ESIMD/PerformanceTests/matrix_transpose.cpp b/sycl/test-e2e/ESIMD/PerformanceTests/matrix_transpose.cpp index 3b5db73fb611c..9a09df6d1d0df 100644 --- a/sycl/test-e2e/ESIMD/PerformanceTests/matrix_transpose.cpp +++ b/sycl/test-e2e/ESIMD/PerformanceTests/matrix_transpose.cpp @@ -10,7 +10,7 @@ // RUN: mkdir -p %t.dir && %{build} -o %t.dir/exec.out // RUN: env IGC_DumpToCustomDir=%t.dir IGC_ShaderDumpEnable=1 %{run} %t.dir/exec.out -// RUN: %if run-mode %{python3 %S/instruction_count.py %t.dir %if igc-dev %{ 1059 %} %else %{ 1116 %} ZTSZZ7runTestjjjRdS_ENKUlRN4sycl3_V17handlerEE_clES3_E3K16.asm%} -// RUN: %if run-mode %{echo "Baseline from driver version 1.3.30872"%} +// RUN: %{run-aux} python3 %S/instruction_count.py %t.dir %if igc-dev %{ 1059 %} %else %{ 1116 %} ZTSZZ7runTestjjjRdS_ENKUlRN4sycl3_V17handlerEE_clES3_E3K16.asm +// RUN: %{run-aux} echo "Baseline from driver version 1.3.30872" #include "../matrix_transpose.cpp" diff --git a/sycl/test-e2e/ESIMD/PerformanceTests/stencil2.cpp b/sycl/test-e2e/ESIMD/PerformanceTests/stencil2.cpp index e306889c9c4b6..2af6f830b8f3f 100644 --- a/sycl/test-e2e/ESIMD/PerformanceTests/stencil2.cpp +++ b/sycl/test-e2e/ESIMD/PerformanceTests/stencil2.cpp @@ -10,7 +10,7 @@ // RUN: mkdir -p %t.dir && %{build} -o %t.dir/exec.out // RUN: env IGC_DumpToCustomDir=%t.dir IGC_ShaderDumpEnable=1 %{run} %t.dir/exec.out -// RUN: %if run-mode %{python3 %S/instruction_count.py %t.dir 1699 ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E14Stencil_kernel.asm%} -// RUN: %if run-mode %{echo "Baseline from driver version 1.3.29138"%} +// RUN: %{run-aux} python3 %S/instruction_count.py %t.dir 1699 ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E14Stencil_kernel.asm +// RUN: %{run-aux} echo "Baseline from driver version 1.3.29138" #include "../stencil2.cpp" diff --git a/sycl/test-e2e/ESIMD/preemption.cpp b/sycl/test-e2e/ESIMD/preemption.cpp index 30c11306c08ca..0e2e11632c3e0 100644 --- a/sycl/test-e2e/ESIMD/preemption.cpp +++ b/sycl/test-e2e/ESIMD/preemption.cpp @@ -9,7 +9,7 @@ // UNSUPPORTED: gpu-intel-dg2 || arch-intel_gpu_pvc // RUN: %{build} -o %t.out // RUN: env IGC_DumpToCustomDir=%t.dump IGC_ShaderDumpEnable=1 %{run} %t.out -// RUN: %if run-mode %{grep enablePreemption %t.dump/*.asm%} +// RUN: %{run-aux} grep enablePreemption %t.dump/*.asm // The test expects to see "enablePreemption" switch in the compilation // switches. It fails if does not find it. diff --git a/sycl/test-e2e/KernelAndProgram/trace_kernel_program_cache.cpp b/sycl/test-e2e/KernelAndProgram/trace_kernel_program_cache.cpp index 496f3bb168cc1..7d02f30cafed7 100644 --- a/sycl/test-e2e/KernelAndProgram/trace_kernel_program_cache.cpp +++ b/sycl/test-e2e/KernelAndProgram/trace_kernel_program_cache.cpp @@ -6,12 +6,12 @@ // or SYCL_CACHE_TRACE is set to 0. // RUN: env SYCL_CACHE_IN_MEM=0 %{run} %t.out 2> %t.trace1 -// RUN: %if run-mode %{ FileCheck --allow-empty --input-file=%t.trace1 --implicit-check-not "In-Memory Cache" %s %} +// RUN: %{run-aux} FileCheck --allow-empty --input-file=%t.trace1 --implicit-check-not "In-Memory Cache" %s // RUN: env SYCL_CACHE_TRACE=0 %{run} %t.out 2> %t.trace2 -// RUN: %if run-mode %{ FileCheck --allow-empty --input-file=%t.trace2 --implicit-check-not "In-Memory Cache" %s %} +// RUN: %{run-aux} FileCheck --allow-empty --input-file=%t.trace2 --implicit-check-not "In-Memory Cache" %s // RUN: env SYCL_CACHE_TRACE=2 %{run} %t.out 2> %t.trace3 -// RUN: %if run-mode %{ FileCheck %s --input-file=%t.trace3 --check-prefix=CHECK-CACHE-TRACE %} +// RUN: %{run-aux} FileCheck %s --input-file=%t.trace3 --check-prefix=CHECK-CACHE-TRACE #include diff --git a/sycl/test-e2e/KernelCompiler/kernel_compiler_cache_eviction.cpp b/sycl/test-e2e/KernelCompiler/kernel_compiler_cache_eviction.cpp index 2340a6d96c06e..d113b389c3b91 100644 --- a/sycl/test-e2e/KernelCompiler/kernel_compiler_cache_eviction.cpp +++ b/sycl/test-e2e/KernelCompiler/kernel_compiler_cache_eviction.cpp @@ -18,7 +18,7 @@ // -- Test again, with caching. // DEFINE: %{cache_vars} = env SYCL_CACHE_PERSISTENT=1 SYCL_CACHE_TRACE=7 SYCL_CACHE_DIR=%t/cache_dir SYCL_CACHE_MAX_SIZE=30000 -// RUN: %if run-mode %{rm -rf %t/cache_dir%} +// RUN: %{run-aux} rm -rf %t/cache_dir // RUN: %{cache_vars} %{run-unfiltered-devices} %t.out 2>&1 | FileCheck %s --check-prefix=CHECK // CHECK: [Persistent Cache]: enabled diff --git a/sycl/test-e2e/format.py b/sycl/test-e2e/format.py index 849d94e6d1a20..c8f5dbd4e9815 100644 --- a/sycl/test-e2e/format.py +++ b/sycl/test-e2e/format.py @@ -306,7 +306,7 @@ def get_extra_env(sycl_devices): # Filter commands based on testing mode is_run_line = any( i in directive.command - for i in ["%{run}", "%{run-unfiltered-devices}", "%if run-mode"] + for i in ["%{run}", "%{run-unfiltered-devices}", "%{run-aux}"] ) ignore_line_filtering = ( diff --git a/sycl/test-e2e/lit.cfg.py b/sycl/test-e2e/lit.cfg.py index 5010362b366df..55688b5a29928 100644 --- a/sycl/test-e2e/lit.cfg.py +++ b/sycl/test-e2e/lit.cfg.py @@ -78,6 +78,9 @@ else: lit_config.error("Invalid argument for test-mode") +# Dummy substitution to indicate line should be a run line +config.substitutions.append(("%{run-aux}", "")) + # Cleanup environment variables which may affect tests possibly_dangerous_env_vars = [ "COMPILER_PATH", From 5b292cd43b0ab7f49e8ee2183d12d704bcca536e Mon Sep 17 00:00:00 2001 From: David Garcia Orozco Date: Mon, 10 Feb 2025 09:23:39 -0700 Subject: [PATCH 08/13] [SYCL][E2E] Replace `any-device-is-` with `target-` features in `%if` markup (#16889) Using `target-` features is preferable for `%if` statements because this feature will only evaluate to true if a device with the corresponding build target is selected, or if we are in `build-only` and the corresponding triple was selected to build for the test. `any-device-is` on the other hand evaluates to true any time we have the device available on the system, even if it is marked as XFAIL/UNSUPPORTED, and on `build-only` it is never true because we have no devices in that mode. --- sycl/test-e2e/AtomicRef/atomic_memory_order_acq_rel.cpp | 2 +- sycl/test-e2e/AtomicRef/atomic_memory_order_seq_cst.cpp | 2 +- sycl/test-e2e/BFloat16/bfloat16_builtins.cpp | 4 ++-- sycl/test-e2e/BFloat16/bfloat16_builtins_cuda_generic.cpp | 4 ++-- sycl/test-e2e/BFloat16/bfloat16_type.cpp | 2 +- sycl/test-e2e/Basic/interop/interop_all_backends.cpp | 8 ++++---- sycl/test-e2e/DeviceLib/cmath_test.cpp | 2 +- sycl/test-e2e/GroupAlgorithm/root_group.cpp | 2 +- sycl/test-e2e/NewOffloadDriver/lit.local.cfg | 2 +- sycl/test-e2e/Reduction/reduction_range_1d_dw.cpp | 2 +- sycl/test-e2e/Reduction/reduction_range_1d_dw_64bit.cpp | 2 +- .../Reduction/reduction_range_1d_reducer_skip.cpp | 2 +- sycl/test-e2e/Reduction/reduction_range_1d_rw.cpp | 2 +- sycl/test-e2e/Reduction/reduction_range_2d_dw.cpp | 2 +- .../Reduction/reduction_range_2d_dw_reducer_skip.cpp | 2 +- sycl/test-e2e/Reduction/reduction_range_2d_rw.cpp | 2 +- sycl/test-e2e/Reduction/reduction_range_3d_dw.cpp | 2 +- sycl/test-e2e/Reduction/reduction_range_3d_rw.cpp | 2 +- .../Reduction/reduction_range_3d_rw_reducer_skip.cpp | 2 +- sycl/test-e2e/Reduction/reduction_range_usm_dw.cpp | 2 +- sycl/test-e2e/USM/P2P/p2p_atomics.cpp | 2 +- .../bindless_images/dx12_interop/read_write_unsampled.cpp | 2 +- .../bindless_images/vulkan_interop/sampled_images.cpp | 2 +- .../bindless_images/vulkan_interop/unsampled_images.cpp | 2 +- sycl/test-e2e/syclcompat/atomic/atomic_class.cpp | 2 +- sycl/test-e2e/syclcompat/atomic/atomic_memory_acq_rel.cpp | 2 +- sycl/test-e2e/syclcompat/math/math_byte_dot_product.cpp | 2 +- 27 files changed, 32 insertions(+), 32 deletions(-) diff --git a/sycl/test-e2e/AtomicRef/atomic_memory_order_acq_rel.cpp b/sycl/test-e2e/AtomicRef/atomic_memory_order_acq_rel.cpp index 2697855754f74..bac155b16b3c4 100644 --- a/sycl/test-e2e/AtomicRef/atomic_memory_order_acq_rel.cpp +++ b/sycl/test-e2e/AtomicRef/atomic_memory_order_acq_rel.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} -O3 -o %t.out %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %} +// RUN: %{build} -O3 -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %} // RUN: %{run} %t.out // NOTE: Tests fetch_add for acquire and release memory ordering. diff --git a/sycl/test-e2e/AtomicRef/atomic_memory_order_seq_cst.cpp b/sycl/test-e2e/AtomicRef/atomic_memory_order_seq_cst.cpp index 686fac9965b48..24bb5ccb19214 100644 --- a/sycl/test-e2e/AtomicRef/atomic_memory_order_seq_cst.cpp +++ b/sycl/test-e2e/AtomicRef/atomic_memory_order_seq_cst.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} -O3 -o %t.out %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %} +// RUN: %{build} -O3 -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %} // RUN: %{run} %t.out // UNSUPPORTED: arch-intel_gpu_bmg_g21 // UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/16924 diff --git a/sycl/test-e2e/BFloat16/bfloat16_builtins.cpp b/sycl/test-e2e/BFloat16/bfloat16_builtins.cpp index 08f0d943a2dd0..d157c68dcaca2 100644 --- a/sycl/test-e2e/BFloat16/bfloat16_builtins.cpp +++ b/sycl/test-e2e/BFloat16/bfloat16_builtins.cpp @@ -5,11 +5,11 @@ // + below sm_80 always uses generic impls // DEFINE: %{mathflags} = %if cl_options %{/clang:-fno-fast-math%} %else %{-fno-fast-math%} -// RUN: %clangxx -fsycl %{sycl_target_opts} %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_80 %} %s -o %t.out %{mathflags} +// RUN: %clangxx -fsycl %{sycl_target_opts} %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_80 %} %s -o %t.out %{mathflags} // RUN: %{run} %t.out // Test "new" (ABI breaking) for all platforms ( sm_80/native if CUDA ) -// RUN: %if preview-breaking-changes-supported %{ %clangxx -fsycl -fpreview-breaking-changes %{sycl_target_opts} %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_80 %} %s -o %t2.out %{mathflags} %} +// RUN: %if preview-breaking-changes-supported %{ %clangxx -fsycl -fpreview-breaking-changes %{sycl_target_opts} %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_80 %} %s -o %t2.out %{mathflags} %} // RUN: %if preview-breaking-changes-supported %{ %{run} %t2.out %} // Flaky timeout on CPU. Enable when fixed. diff --git a/sycl/test-e2e/BFloat16/bfloat16_builtins_cuda_generic.cpp b/sycl/test-e2e/BFloat16/bfloat16_builtins_cuda_generic.cpp index 719bf4709ae4c..f233d198535ca 100644 --- a/sycl/test-e2e/BFloat16/bfloat16_builtins_cuda_generic.cpp +++ b/sycl/test-e2e/BFloat16/bfloat16_builtins_cuda_generic.cpp @@ -7,8 +7,8 @@ // DEFINE: %{mathflags} = %if cl_options %{/clang:-fno-fast-math%} %else %{-fno-fast-math%} // If CUDA, test "new" again for sm_75/generic -// RUN: %if any-device-is-cuda %{ %if preview-breaking-changes-supported %{ %clangxx -fsycl -fpreview-breaking-changes %{sycl_target_opts} -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_75 %s -o %t3.out %{mathflags} %} %} -// RUN: %if any-device-is-cuda %{ %if preview-breaking-changes-supported %{ %{run} %t3.out %} %} +// RUN: %if target-nvidia %{ %if preview-breaking-changes-supported %{ %clangxx -fsycl -fpreview-breaking-changes %{sycl_target_opts} -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_75 %s -o %t3.out %{mathflags} %} %} +// RUN: %if target-nvidia %{ %if preview-breaking-changes-supported %{ %{run} %t3.out %} %} #include "bfloat16_builtins.hpp" diff --git a/sycl/test-e2e/BFloat16/bfloat16_type.cpp b/sycl/test-e2e/BFloat16/bfloat16_type.cpp index 7e7972f949522..20ac1d311ed9c 100644 --- a/sycl/test-e2e/BFloat16/bfloat16_type.cpp +++ b/sycl/test-e2e/BFloat16/bfloat16_type.cpp @@ -1,4 +1,4 @@ -// RUN: %if any-device-is-cuda %{ %{build} -DUSE_CUDA_SM80=1 -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_80 -o %t.cuda.out %} +// RUN: %if target-nvidia %{ %{build} -DUSE_CUDA_SM80=1 -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_80 -o %t.cuda.out %} // RUN: %if cuda %{ %{run} %t.cuda.out %} // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/Basic/interop/interop_all_backends.cpp b/sycl/test-e2e/Basic/interop/interop_all_backends.cpp index 902ab1f1d09b9..c4dec5e6f670c 100644 --- a/sycl/test-e2e/Basic/interop/interop_all_backends.cpp +++ b/sycl/test-e2e/Basic/interop/interop_all_backends.cpp @@ -1,8 +1,8 @@ -// XFAIL: any-device-is-cuda +// XFAIL: target-nvidia // XFAIL-TRACKER: https://github.com/intel/llvm/issues/16070 -// RUN: %if any-device-is-opencl %{ %{build} -o %t-opencl.out %} -// RUN: %if any-device-is-cuda %{ %{build} -isystem %sycl_include -DBUILD_FOR_CUDA -o %t-cuda.out %} -// RUN: %if any-device-is-hip %{ %{build} -DBUILD_FOR_HIP -o %t-hip.out %} +// RUN: %if target-spir %{ %{build} -o %t-opencl.out %} +// RUN: %if target-nvidia %{ %{build} -isystem %sycl_include -DBUILD_FOR_CUDA -o %t-cuda.out %} +// RUN: %if target-amd %{ %{build} -DBUILD_FOR_HIP -o %t-hip.out %} #include #include diff --git a/sycl/test-e2e/DeviceLib/cmath_test.cpp b/sycl/test-e2e/DeviceLib/cmath_test.cpp index 97a92bc2b3885..86fdb3e7c87ab 100644 --- a/sycl/test-e2e/DeviceLib/cmath_test.cpp +++ b/sycl/test-e2e/DeviceLib/cmath_test.cpp @@ -7,7 +7,7 @@ // RUN: %if !gpu %{ %{run} %t2.out %} // // // Check that --fast-math works with cmath funcs for CUDA -// RUN: %if any-device-is-cuda %{ %{build} -Wno-nan-infinity-disabled -fno-builtin %{mathflags} -o %t3.out -ffast-math -DSYCL_E2E_FASTMATH %} +// RUN: %if target-nvidia %{ %clangxx -fsycl -fsycl-targets=nvptx64-nvidia-cuda %s -Wno-nan-infinity-disabled -fno-builtin %{mathflags} -o %t3.out -ffast-math -DSYCL_E2E_FASTMATH %} // RUN: %if cuda %{ %{run} %t3.out %} #include "math_utils.hpp" diff --git a/sycl/test-e2e/GroupAlgorithm/root_group.cpp b/sycl/test-e2e/GroupAlgorithm/root_group.cpp index 257b5a4e4457f..c1fdeb58acec7 100644 --- a/sycl/test-e2e/GroupAlgorithm/root_group.cpp +++ b/sycl/test-e2e/GroupAlgorithm/root_group.cpp @@ -5,7 +5,7 @@ // TODO: Currently using the -Wno-deprecated-declarations flag due to issue // https://github.com/intel/llvm/issues/16451. Rewrite testRootGroup() amd // remove the flag once the issue is resolved. -// RUN: %{build} -I . -o %t.out -Wno-deprecated-declarations %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %} +// RUN: %{build} -I . -o %t.out -Wno-deprecated-declarations %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %} // RUN: %{run} %t.out // Disabled temporarily while investigation into the failure is ongoing. diff --git a/sycl/test-e2e/NewOffloadDriver/lit.local.cfg b/sycl/test-e2e/NewOffloadDriver/lit.local.cfg index 1e48e3bd2ad52..892e3cddea99e 100644 --- a/sycl/test-e2e/NewOffloadDriver/lit.local.cfg +++ b/sycl/test-e2e/NewOffloadDriver/lit.local.cfg @@ -3,5 +3,5 @@ import platform config.unsupported_features += ['accelerator'] config.substitutions.append( - ("%{embed-ir}", "%if any-device-is-hip || any-device-is-cuda %{ -fsycl-embed-ir %}") + ("%{embed-ir}", "%if target-amd || target-nvidia %{ -fsycl-embed-ir %}") ) diff --git a/sycl/test-e2e/Reduction/reduction_range_1d_dw.cpp b/sycl/test-e2e/Reduction/reduction_range_1d_dw.cpp index 4906b91350259..9e2f59c9ae3e8 100644 --- a/sycl/test-e2e/Reduction/reduction_range_1d_dw.cpp +++ b/sycl/test-e2e/Reduction/reduction_range_1d_dw.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} -DENABLE_64_BIT=false -o %t.out %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} +// RUN: %{build} -DENABLE_64_BIT=false -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} // RUN: %{run} %t.out // Windows doesn't yet have full shutdown(). diff --git a/sycl/test-e2e/Reduction/reduction_range_1d_dw_64bit.cpp b/sycl/test-e2e/Reduction/reduction_range_1d_dw_64bit.cpp index fc1f83d558be7..e1f2245485f1a 100644 --- a/sycl/test-e2e/Reduction/reduction_range_1d_dw_64bit.cpp +++ b/sycl/test-e2e/Reduction/reduction_range_1d_dw_64bit.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} -DENABLE_64_BIT=true -o %t.out %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} +// RUN: %{build} -DENABLE_64_BIT=true -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} // RUN: %{run} %t.out // Windows doesn't yet have full shutdown(). diff --git a/sycl/test-e2e/Reduction/reduction_range_1d_reducer_skip.cpp b/sycl/test-e2e/Reduction/reduction_range_1d_reducer_skip.cpp index bb8c56d1a8c2d..3f74339a6e640 100644 --- a/sycl/test-e2e/Reduction/reduction_range_1d_reducer_skip.cpp +++ b/sycl/test-e2e/Reduction/reduction_range_1d_reducer_skip.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} -o %t.out %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} +// RUN: %{build} -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} // RUN: %{run} %t.out // Windows doesn't yet have full shutdown(). diff --git a/sycl/test-e2e/Reduction/reduction_range_1d_rw.cpp b/sycl/test-e2e/Reduction/reduction_range_1d_rw.cpp index c8eb129a06ad4..816611ea3be44 100644 --- a/sycl/test-e2e/Reduction/reduction_range_1d_rw.cpp +++ b/sycl/test-e2e/Reduction/reduction_range_1d_rw.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} -o %t.out %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} +// RUN: %{build} -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} // RUN: %{run} %t.out // Windows doesn't yet have full shutdown(). diff --git a/sycl/test-e2e/Reduction/reduction_range_2d_dw.cpp b/sycl/test-e2e/Reduction/reduction_range_2d_dw.cpp index e59061f6b88a2..bede263e59ad5 100644 --- a/sycl/test-e2e/Reduction/reduction_range_2d_dw.cpp +++ b/sycl/test-e2e/Reduction/reduction_range_2d_dw.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} -o %t.out %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} +// RUN: %{build} -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} // RUN: %{run} %t.out // Windows doesn't yet have full shutdown(). diff --git a/sycl/test-e2e/Reduction/reduction_range_2d_dw_reducer_skip.cpp b/sycl/test-e2e/Reduction/reduction_range_2d_dw_reducer_skip.cpp index 4a2c7fd24ac00..c58010ab666d8 100644 --- a/sycl/test-e2e/Reduction/reduction_range_2d_dw_reducer_skip.cpp +++ b/sycl/test-e2e/Reduction/reduction_range_2d_dw_reducer_skip.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} -o %t.out %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} +// RUN: %{build} -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} // RUN: %{run} %t.out // Windows doesn't yet have full shutdown(). diff --git a/sycl/test-e2e/Reduction/reduction_range_2d_rw.cpp b/sycl/test-e2e/Reduction/reduction_range_2d_rw.cpp index c1854ad044146..6fe6ae756a87a 100644 --- a/sycl/test-e2e/Reduction/reduction_range_2d_rw.cpp +++ b/sycl/test-e2e/Reduction/reduction_range_2d_rw.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} -o %t.out %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} +// RUN: %{build} -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} // RUN: %{run} %t.out // Windows doesn't yet have full shutdown(). diff --git a/sycl/test-e2e/Reduction/reduction_range_3d_dw.cpp b/sycl/test-e2e/Reduction/reduction_range_3d_dw.cpp index 735adb82df87d..a832c6ab30046 100644 --- a/sycl/test-e2e/Reduction/reduction_range_3d_dw.cpp +++ b/sycl/test-e2e/Reduction/reduction_range_3d_dw.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} -o %t.out %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} +// RUN: %{build} -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} // RUN: %{run} %t.out // Windows doesn't yet have full shutdown(). diff --git a/sycl/test-e2e/Reduction/reduction_range_3d_rw.cpp b/sycl/test-e2e/Reduction/reduction_range_3d_rw.cpp index 015fa167647db..dfc2366c3cb7e 100644 --- a/sycl/test-e2e/Reduction/reduction_range_3d_rw.cpp +++ b/sycl/test-e2e/Reduction/reduction_range_3d_rw.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} -o %t.out %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} +// RUN: %{build} -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} // RUN: %{run} %t.out // Windows doesn't yet have full shutdown(). diff --git a/sycl/test-e2e/Reduction/reduction_range_3d_rw_reducer_skip.cpp b/sycl/test-e2e/Reduction/reduction_range_3d_rw_reducer_skip.cpp index 85960235d2e3c..31ad10d456011 100644 --- a/sycl/test-e2e/Reduction/reduction_range_3d_rw_reducer_skip.cpp +++ b/sycl/test-e2e/Reduction/reduction_range_3d_rw_reducer_skip.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} -o %t.out %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} +// RUN: %{build} -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} // RUN: %{run} %t.out // Windows doesn't yet have full shutdown(). diff --git a/sycl/test-e2e/Reduction/reduction_range_usm_dw.cpp b/sycl/test-e2e/Reduction/reduction_range_usm_dw.cpp index 9c9f366f6a588..add12df70e305 100644 --- a/sycl/test-e2e/Reduction/reduction_range_usm_dw.cpp +++ b/sycl/test-e2e/Reduction/reduction_range_usm_dw.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} -o %t.out %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} +// RUN: %{build} -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 %} // RUN: %{run} %t.out // Windows doesn't yet have full shutdown(). diff --git a/sycl/test-e2e/USM/P2P/p2p_atomics.cpp b/sycl/test-e2e/USM/P2P/p2p_atomics.cpp index b81405f316e7c..1feb4c4b01163 100644 --- a/sycl/test-e2e/USM/P2P/p2p_atomics.cpp +++ b/sycl/test-e2e/USM/P2P/p2p_atomics.cpp @@ -1,5 +1,5 @@ // REQUIRES: cuda || hip || level_zero -// RUN: %{build} %if any-device-is-cuda %{ -Xsycl-target-backend --cuda-gpu-arch=sm_61 %} -o %t.out +// RUN: %{build} %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_61 %} -o %t.out // RUN: %{run} %t.out #include diff --git a/sycl/test-e2e/bindless_images/dx12_interop/read_write_unsampled.cpp b/sycl/test-e2e/bindless_images/dx12_interop/read_write_unsampled.cpp index 1aeb63636cc71..e00fd26271372 100644 --- a/sycl/test-e2e/bindless_images/dx12_interop/read_write_unsampled.cpp +++ b/sycl/test-e2e/bindless_images/dx12_interop/read_write_unsampled.cpp @@ -2,7 +2,7 @@ // REQUIRES: windows // DEFINE: %{link-flags}=%if cl_options %{ /clang:-ld3d12 /clang:-ldxgi /clang:-ldxguid %} %else %{ -ld3d12 -ldxgi -ldxguid %} -// RUN: %{build} %{link-flags} -o %t.out %if any-device-is-level_zero %{ -DDISABLE_UNORM_TESTS %} +// RUN: %{build} %{link-flags} -o %t.out %if target-spir %{ -DDISABLE_UNORM_TESTS %} // RUN: %{run-unfiltered-devices} env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out #pragma clang diagnostic ignored "-Waddress-of-temporary" diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images.cpp index 46335e2b1d850..6cd2eec98ca4e 100644 --- a/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images.cpp +++ b/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images.cpp @@ -1,7 +1,7 @@ // REQUIRES: aspect-ext_oneapi_external_memory_import || (windows && level_zero && aspect-ext_oneapi_bindless_images) // REQUIRES: vulkan -// RUN: %{build} %link-vulkan -o %t.out %if any-device-is-level_zero %{ -Wno-ignored-attributes -DENABLE_LINEAR_TILING -DTEST_L0_SUPPORTED_VK_FORMAT %} +// RUN: %{build} %link-vulkan -o %t.out %if target-spir %{ -Wno-ignored-attributes -DENABLE_LINEAR_TILING -DTEST_L0_SUPPORTED_VK_FORMAT %} // RUN: %{run} env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out // Uncomment to print additional test information diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp index dccb3a2828aec..8dd0b4f3f8243 100644 --- a/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp +++ b/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp @@ -1,7 +1,7 @@ // REQUIRES: aspect-ext_oneapi_external_memory_import || (windows && level_zero && aspect-ext_oneapi_bindless_images) // REQUIRES: vulkan -// RUN: %{build} %link-vulkan -o %t.out %if any-device-is-level_zero %{ -Wno-ignored-attributes -DTEST_L0_SUPPORTED_VK_FORMAT %} +// RUN: %{build} %link-vulkan -o %t.out %if target-spir %{ -Wno-ignored-attributes -DTEST_L0_SUPPORTED_VK_FORMAT %} // RUN: %{run} env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out // Uncomment to print additional test information diff --git a/sycl/test-e2e/syclcompat/atomic/atomic_class.cpp b/sycl/test-e2e/syclcompat/atomic/atomic_class.cpp index 123f5a2c73dbd..f43c60d8a3589 100644 --- a/sycl/test-e2e/syclcompat/atomic/atomic_class.cpp +++ b/sycl/test-e2e/syclcompat/atomic/atomic_class.cpp @@ -32,7 +32,7 @@ // UNSUPPORTED: hip || (windows && level_zero) -// RUN: %{build} %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %} -o %t.out +// RUN: %{build} %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %} -o %t.out // RUN: %{run} %t.out #include diff --git a/sycl/test-e2e/syclcompat/atomic/atomic_memory_acq_rel.cpp b/sycl/test-e2e/syclcompat/atomic/atomic_memory_acq_rel.cpp index 609652a58b17d..b3c48d0867b9f 100644 --- a/sycl/test-e2e/syclcompat/atomic/atomic_memory_acq_rel.cpp +++ b/sycl/test-e2e/syclcompat/atomic/atomic_memory_acq_rel.cpp @@ -32,7 +32,7 @@ // UNSUPPORTED: hip -// RUN: %{build} %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %} -o %t.out +// RUN: %{build} %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %} -o %t.out // RUN: %{run} %t.out #include diff --git a/sycl/test-e2e/syclcompat/math/math_byte_dot_product.cpp b/sycl/test-e2e/syclcompat/math/math_byte_dot_product.cpp index 41421ee2b9d85..a53408157ecf6 100644 --- a/sycl/test-e2e/syclcompat/math/math_byte_dot_product.cpp +++ b/sycl/test-e2e/syclcompat/math/math_byte_dot_product.cpp @@ -29,7 +29,7 @@ // // ===---------------------------------------------------------------------===// -// RUN: %{build} %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_61 %} -o %t.out +// RUN: %{build} %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_61 %} -o %t.out // RUN: %{run} %t.out #include From 4ca4115e376a08860bbad4dc5cc192ccf1dd9315 Mon Sep 17 00:00:00 2001 From: Alexey Bader Date: Mon, 10 Feb 2025 19:27:45 +0200 Subject: [PATCH 09/13] [clang-linker-wrapper][NFC] Remove dead code (#16879) InputFiles variable defined in the function is not used. If statements below define local scope variables with the same name. --- clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 735ecb55fc80c..5fe6f551e02f7 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -2362,14 +2362,6 @@ Expected> linkAndWrapDeviceFiles( HasNonSYCLOffloadKinds = true; } - // Write any remaining device inputs to an output file. - SmallVector InputFiles; - for (const OffloadFile &File : Input) { - auto FileNameOrErr = writeOffloadFile(File); - if (!FileNameOrErr) - return FileNameOrErr.takeError(); - InputFiles.emplace_back(*FileNameOrErr); - } if (HasSYCLOffloadKind) { SmallVector InputFiles; // Write device inputs to an output file for the linker. From b8a2ab74c0e24adb1b3d0bf8f02188ab32b8e08e Mon Sep 17 00:00:00 2001 From: David Garcia Orozco Date: Mon, 10 Feb 2025 10:31:28 -0700 Subject: [PATCH 10/13] [SYCL][E2E] Use `target-` requirements in tests that fail to build for all triples (#16895) These tests fail building for all triples, so this pr adds `target-` requirements to avoid compiling for those triples while in `build-only`. Previously when in `full` testing mode these tests were either marked as unsupported for the backend corresponding to a given triple, or required a feature that implicitly made some triples unsupported (i.e., `accelerator`, or `sg-8`). --- sycl/test-e2e/Basic/fpga_tests/fpga_pipes.cpp | 2 +- sycl/test-e2e/Basic/fpga_tests/fpga_pipes_legacy_ns.cpp | 2 +- sycl/test-e2e/Basic/fpga_tests/fpga_pipes_mixed_usage.cpp | 2 +- sycl/test-e2e/Basic/multisource_spv_obj.cpp | 2 +- sycl/test-e2e/Basic/stream/blocking_pipes_and_stream.cpp | 2 +- .../ClusterLaunch/cluster_launch_enqueue_functions.cpp | 2 +- sycl/test-e2e/ClusterLaunch/cluster_launch_parallel_for.cpp | 2 +- .../ClusterLaunch/enqueueLaunchCustom_check_event_deps.cpp | 2 +- .../DeviceImageDependencies/NewOffloadDriver/dynamic.cpp | 2 +- .../NewOffloadDriver/free_function_kernels.cpp | 4 ++-- .../NewOffloadDriver/math_device_lib.cpp | 2 +- .../DeviceImageDependencies/NewOffloadDriver/objects.cpp | 2 +- .../NewOffloadDriver/singleDynamicLibrary.cpp | 2 +- sycl/test-e2e/DeviceImageDependencies/dynamic.cpp | 2 +- sycl/test-e2e/DeviceImageDependencies/math_device_lib.cpp | 2 +- sycl/test-e2e/DeviceImageDependencies/objects.cpp | 2 +- .../test-e2e/DeviceImageDependencies/singleDynamicLibrary.cpp | 2 +- .../GroupAlgorithm/SYCL2020/group_sort/array_input_sort.cpp | 4 ++++ .../SYCL2020/group_sort/group_and_joint_sort.cpp | 4 ++++ .../SYCL2020/group_sort/key_value_array_input_sort.cpp | 4 ++++ .../GroupAlgorithm/SYCL2020/group_sort/key_value_sort.cpp | 4 ++++ sycl/test-e2e/KernelAndProgram/build-log.cpp | 2 +- sycl/test-e2e/KernelAndProgram/cache-build-result.cpp | 2 +- sycl/test-e2e/KernelAndProgram/level-zero-link-flags.cpp | 2 +- sycl/test-e2e/KernelAndProgram/undefined-symbol.cpp | 2 +- sycl/test-e2e/LLVMIntrinsicLowering/bitreverse.cpp | 2 +- sycl/test-e2e/LLVMIntrinsicLowering/sub_byte_bitreverse.cpp | 2 +- sycl/test-e2e/Regression/acos.cpp | 2 +- sycl/test-e2e/SPVDumpUse/basic.cpp | 2 +- sycl/test-e2e/SPVDumpUse/kernel_bundle.cpp | 2 +- sycl/test-e2e/SubGroup/attributes.cpp | 2 +- sycl/test-e2e/VirtualFunctions/lit.local.cfg | 2 +- 32 files changed, 45 insertions(+), 29 deletions(-) diff --git a/sycl/test-e2e/Basic/fpga_tests/fpga_pipes.cpp b/sycl/test-e2e/Basic/fpga_tests/fpga_pipes.cpp index 4932284647b0d..3865b6b3ed0cd 100644 --- a/sycl/test-e2e/Basic/fpga_tests/fpga_pipes.cpp +++ b/sycl/test-e2e/Basic/fpga_tests/fpga_pipes.cpp @@ -1,4 +1,4 @@ -// REQUIRES: accelerator +// REQUIRES: target-spir, accelerator // https://github.com/intel/llvm/issues/14308 // Temporarily re-enabled for testing purposes. // If you are facing issues with this test, please leave a comment in: diff --git a/sycl/test-e2e/Basic/fpga_tests/fpga_pipes_legacy_ns.cpp b/sycl/test-e2e/Basic/fpga_tests/fpga_pipes_legacy_ns.cpp index 2ed8d6265aa5f..85d19c3707662 100644 --- a/sycl/test-e2e/Basic/fpga_tests/fpga_pipes_legacy_ns.cpp +++ b/sycl/test-e2e/Basic/fpga_tests/fpga_pipes_legacy_ns.cpp @@ -1,4 +1,4 @@ -// REQUIRES: accelerator +// REQUIRES: target-spir, accelerator // RUN: %{build} -o %t.out // RUN: %{run} %t.out //==-------- fpga_pipes_legacy_ns.cpp - SYCL FPGA pipes test ---------------==// diff --git a/sycl/test-e2e/Basic/fpga_tests/fpga_pipes_mixed_usage.cpp b/sycl/test-e2e/Basic/fpga_tests/fpga_pipes_mixed_usage.cpp index 060de2e07adb1..6404b6f092e09 100644 --- a/sycl/test-e2e/Basic/fpga_tests/fpga_pipes_mixed_usage.cpp +++ b/sycl/test-e2e/Basic/fpga_tests/fpga_pipes_mixed_usage.cpp @@ -5,7 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// REQUIRES: accelerator +// REQUIRES: target-spir, accelerator // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/Basic/multisource_spv_obj.cpp b/sycl/test-e2e/Basic/multisource_spv_obj.cpp index 415c07e67f013..29d7f4559e2c5 100644 --- a/sycl/test-e2e/Basic/multisource_spv_obj.cpp +++ b/sycl/test-e2e/Basic/multisource_spv_obj.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: target-nvidia || target-amd // // Separate kernel sources and host code sources // RUN: %{build} -fsycl-device-obj=spirv -c -o %t.kernel.o -DINIT_KERNEL -DCALC_KERNEL diff --git a/sycl/test-e2e/Basic/stream/blocking_pipes_and_stream.cpp b/sycl/test-e2e/Basic/stream/blocking_pipes_and_stream.cpp index d61f1f69234a3..340d48f34fb32 100644 --- a/sycl/test-e2e/Basic/stream/blocking_pipes_and_stream.cpp +++ b/sycl/test-e2e/Basic/stream/blocking_pipes_and_stream.cpp @@ -1,4 +1,4 @@ -// REQUIRES: accelerator +// REQUIRES: target-spir, accelerator // RUN: %{build} -o %t.out // RUN: %{run} %t.out | FileCheck %s diff --git a/sycl/test-e2e/ClusterLaunch/cluster_launch_enqueue_functions.cpp b/sycl/test-e2e/ClusterLaunch/cluster_launch_enqueue_functions.cpp index e639260be5714..54c392410175f 100644 --- a/sycl/test-e2e/ClusterLaunch/cluster_launch_enqueue_functions.cpp +++ b/sycl/test-e2e/ClusterLaunch/cluster_launch_enqueue_functions.cpp @@ -1,6 +1,6 @@ // Tests whether or not cluster launch was successful, with the correct ranges // that were passed via enqueue functions extension -// REQUIRES: aspect-ext_oneapi_cuda_cluster_group +// REQUIRES: target-nvidia, aspect-ext_oneapi_cuda_cluster_group // RUN: %{build} -Xsycl-target-backend --cuda-gpu-arch=sm_90 -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/ClusterLaunch/cluster_launch_parallel_for.cpp b/sycl/test-e2e/ClusterLaunch/cluster_launch_parallel_for.cpp index 01db70b11464a..5654af717b461 100644 --- a/sycl/test-e2e/ClusterLaunch/cluster_launch_parallel_for.cpp +++ b/sycl/test-e2e/ClusterLaunch/cluster_launch_parallel_for.cpp @@ -1,6 +1,6 @@ // Tests whether or not cluster launch was successful, with the correct ranges // that were passed via parallel for overload -// REQUIRES: aspect-ext_oneapi_cuda_cluster_group +// REQUIRES: target-nvidia, aspect-ext_oneapi_cuda_cluster_group // RUN: %{build} -Xsycl-target-backend --cuda-gpu-arch=sm_90 -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/ClusterLaunch/enqueueLaunchCustom_check_event_deps.cpp b/sycl/test-e2e/ClusterLaunch/enqueueLaunchCustom_check_event_deps.cpp index 0460defa72104..e15d361a5ca3e 100644 --- a/sycl/test-e2e/ClusterLaunch/enqueueLaunchCustom_check_event_deps.cpp +++ b/sycl/test-e2e/ClusterLaunch/enqueueLaunchCustom_check_event_deps.cpp @@ -1,6 +1,6 @@ // Checks whether or not event Dependencies are honored by // urEnqueueKernelLaunchCustomExp -// REQUIRES: aspect-ext_oneapi_cuda_cluster_group +// REQUIRES: target-nvidia, aspect-ext_oneapi_cuda_cluster_group // RUN: %{build} -Xsycl-target-backend --cuda-gpu-arch=sm_90 -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/dynamic.cpp b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/dynamic.cpp index b7c93167b7fb7..1125deb7b8a19 100644 --- a/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/dynamic.cpp +++ b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/dynamic.cpp @@ -1,6 +1,6 @@ // Test -fsycl-allow-device-image-dependencies with dynamic libraries. -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: target-nvidia || target-amd // UNSUPPORTED-INTENDED: Not implemented yet for Nvidia/AMD backends. // DEFINE: %{dynamic_lib_options} = -fsycl %fPIC %shared_lib -fsycl-allow-device-image-dependencies -I %S/Inputs %if windows %{-DMAKE_DLL %} diff --git a/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/free_function_kernels.cpp b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/free_function_kernels.cpp index fda36e3af7b6b..a0aabc1cfcc8b 100644 --- a/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/free_function_kernels.cpp +++ b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/free_function_kernels.cpp @@ -5,10 +5,10 @@ // RUN: %{run} %t.out // The name mangling for free function kernels currently does not work with PTX. -// UNSUPPORTED: cuda +// UNSUPPORTED: target-nvidia // UNSUPPORTED-INTENDED: Not implemented yet for Nvidia/AMD backends. -// XFAIL: hip +// XFAIL: target-amd // XFAIL-TRACKER: https://github.com/intel/llvm/issues/15742 // XFAIL: spirv-backend && run-mode diff --git a/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/math_device_lib.cpp b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/math_device_lib.cpp index e188871acc7cd..55b536babdce0 100644 --- a/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/math_device_lib.cpp +++ b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/math_device_lib.cpp @@ -1,5 +1,5 @@ // REQUIRES: aspect-fp64 -// UNSUPPORTED: hip || cuda +// UNSUPPORTED: target-amd || target-nvidia // UNSUPPORTED-INTENDED: Not implemented yet for Nvidia/AMD backends. // DEFINE: %{mathflags} = %if cl_options %{/clang:-fno-fast-math%} %else %{-fno-fast-math%} diff --git a/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/objects.cpp b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/objects.cpp index ea1875dafe52b..b83b73736c496 100644 --- a/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/objects.cpp +++ b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/objects.cpp @@ -1,6 +1,6 @@ // Test -fsycl-allow-device-image-dependencies with objects. -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: target-nvidia || target-amd // UNSUPPORTED-INTENDED: Not implemented yet for Nvidia/AMD backends. // RUN: %clangxx --offload-new-driver -fsycl %S/Inputs/a.cpp -I %S/Inputs -c -o %t_a.o diff --git a/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/singleDynamicLibrary.cpp b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/singleDynamicLibrary.cpp index e8de04cf82eb5..38f14f01a41a2 100644 --- a/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/singleDynamicLibrary.cpp +++ b/sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/singleDynamicLibrary.cpp @@ -1,7 +1,7 @@ // Test -fsycl-allow-device-image-dependencies with a single dynamic library on // Windows and Linux. -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: target-nvidia || target-amd // UNSUPPORTED-INTENDED: Not implemented yet for Nvidia/AMD backends. // RUN: %clangxx --offload-new-driver -fsycl %fPIC %shared_lib -fsycl-allow-device-image-dependencies -I %S/Inputs \ diff --git a/sycl/test-e2e/DeviceImageDependencies/dynamic.cpp b/sycl/test-e2e/DeviceImageDependencies/dynamic.cpp index 1bdaf3b1d6270..05d176ced16c8 100644 --- a/sycl/test-e2e/DeviceImageDependencies/dynamic.cpp +++ b/sycl/test-e2e/DeviceImageDependencies/dynamic.cpp @@ -1,6 +1,6 @@ // Test -fsycl-allow-device-image-dependencies with dynamic libraries. -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: target-nvidia || target-amd // DEFINE: %{dynamic_lib_options} = -fsycl %fPIC %shared_lib -fsycl-allow-device-image-dependencies -I %S/Inputs %if windows %{-DMAKE_DLL %} // DEFINE: %{dynamic_lib_suffix} = %if windows %{dll%} %else %{so%} diff --git a/sycl/test-e2e/DeviceImageDependencies/math_device_lib.cpp b/sycl/test-e2e/DeviceImageDependencies/math_device_lib.cpp index 440935e31e3eb..a8d32703bd16a 100644 --- a/sycl/test-e2e/DeviceImageDependencies/math_device_lib.cpp +++ b/sycl/test-e2e/DeviceImageDependencies/math_device_lib.cpp @@ -1,5 +1,5 @@ // REQUIRES: aspect-fp64 -// UNSUPPORTED: hip || cuda +// UNSUPPORTED: target-amd || target-nvidia // DEFINE: %{mathflags} = %if cl_options %{/clang:-fno-fast-math%} %else %{-fno-fast-math%} diff --git a/sycl/test-e2e/DeviceImageDependencies/objects.cpp b/sycl/test-e2e/DeviceImageDependencies/objects.cpp index eea085dc9b905..8d2daf9228c2d 100644 --- a/sycl/test-e2e/DeviceImageDependencies/objects.cpp +++ b/sycl/test-e2e/DeviceImageDependencies/objects.cpp @@ -1,6 +1,6 @@ // Test -fsycl-allow-device-image-dependencies with objects. -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: target-nvidia || target-amd // RUN: %clangxx -fsycl %S/Inputs/a.cpp -I %S/Inputs -c -o %t_a.o // RUN: %clangxx -fsycl %S/Inputs/b.cpp -I %S/Inputs -c -o %t_b.o diff --git a/sycl/test-e2e/DeviceImageDependencies/singleDynamicLibrary.cpp b/sycl/test-e2e/DeviceImageDependencies/singleDynamicLibrary.cpp index f0a65b6f1f056..9d72eab7ee8e3 100644 --- a/sycl/test-e2e/DeviceImageDependencies/singleDynamicLibrary.cpp +++ b/sycl/test-e2e/DeviceImageDependencies/singleDynamicLibrary.cpp @@ -1,7 +1,7 @@ // Test -fsycl-allow-device-image-dependencies with a single dynamic library on Windows // and Linux. -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: target-nvidia || target-amd // RUN: %clangxx -fsycl %fPIC %shared_lib -fsycl-allow-device-image-dependencies -I %S/Inputs \ // RUN: %S/Inputs/a.cpp \ diff --git a/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/array_input_sort.cpp b/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/array_input_sort.cpp index 3adfc92ccf256..ae6c374c97535 100644 --- a/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/array_input_sort.cpp +++ b/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/array_input_sort.cpp @@ -1,4 +1,8 @@ // REQUIRES: sg-8 +// UNSUPPORTED: target-nvidia || target-amd +// UNSUPPORTED-INTENDED: subgroup size requirement implicitly make nvptx/amdgcn +// not supported + // RUN: %{build} -fsycl-device-code-split=per_kernel -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/group_and_joint_sort.cpp b/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/group_and_joint_sort.cpp index ab7ff36dd7c19..f9988030bcb61 100644 --- a/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/group_and_joint_sort.cpp +++ b/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/group_and_joint_sort.cpp @@ -1,4 +1,8 @@ // REQUIRES: sg-8 +// UNSUPPORTED: target-nvidia || target-amd +// UNSUPPORTED-INTENDED: subgroup size requirement implicitly make nvptx/amdgcn +// not supported + // RUN: %{build} -fsycl-device-code-split=per_kernel -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/key_value_array_input_sort.cpp b/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/key_value_array_input_sort.cpp index 0b415f878e85f..1a6e81829511b 100644 --- a/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/key_value_array_input_sort.cpp +++ b/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/key_value_array_input_sort.cpp @@ -1,4 +1,8 @@ // REQUIRES: sg-8 +// UNSUPPORTED: target-nvidia || target-amd +// UNSUPPORTED-INTENDED: subgroup size requirement implicitly make nvptx/amdgcn +// not supported + // RUN: %{build} -fsycl-device-code-split=per_kernel -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/key_value_sort.cpp b/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/key_value_sort.cpp index b2347d9b6de6e..be162c09f7e4e 100644 --- a/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/key_value_sort.cpp +++ b/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/key_value_sort.cpp @@ -1,4 +1,8 @@ // REQUIRES: sg-8 +// UNSUPPORTED: target-nvidia || target-amd +// UNSUPPORTED-INTENDED: subgroup size requirement implicitly make nvptx/amdgcn +// not supported + // RUN: %{build} -fsycl-device-code-split=per_kernel -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/KernelAndProgram/build-log.cpp b/sycl/test-e2e/KernelAndProgram/build-log.cpp index dc6c7bc97cd91..ae9903163f626 100644 --- a/sycl/test-e2e/KernelAndProgram/build-log.cpp +++ b/sycl/test-e2e/KernelAndProgram/build-log.cpp @@ -1,5 +1,5 @@ // for CUDA and HIP the failure happens at compile time, not during runtime -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: target-nvidia || target-amd // TODO: rewrite this into a unit-test // RUN: %{build} -DGPU -o %t_gpu.out diff --git a/sycl/test-e2e/KernelAndProgram/cache-build-result.cpp b/sycl/test-e2e/KernelAndProgram/cache-build-result.cpp index 7ebf391a631b5..41a7ba7af3cd7 100644 --- a/sycl/test-e2e/KernelAndProgram/cache-build-result.cpp +++ b/sycl/test-e2e/KernelAndProgram/cache-build-result.cpp @@ -1,5 +1,5 @@ // for CUDA and HIP the failure happens at compile time, not during runtime -// UNSUPPORTED: cuda || hip || ze_debug +// UNSUPPORTED: target-nvidia || target-amd || ze_debug // RUN: %{build} -DSYCL_DISABLE_FALLBACK_ASSERT=1 -o %t.out // RUN: %{build} -DSYCL_DISABLE_FALLBACK_ASSERT=1 -DGPU -o %t_gpu.out diff --git a/sycl/test-e2e/KernelAndProgram/level-zero-link-flags.cpp b/sycl/test-e2e/KernelAndProgram/level-zero-link-flags.cpp index c37ce90530192..2f233aaa731a7 100644 --- a/sycl/test-e2e/KernelAndProgram/level-zero-link-flags.cpp +++ b/sycl/test-e2e/KernelAndProgram/level-zero-link-flags.cpp @@ -1,4 +1,4 @@ -// REQUIRES: level_zero +// REQUIRES: target-spir, level_zero // RUN: %{build} -Xsycl-target-linker=spir64 -foo -o %t.out // RUN: %{run} %t.out //==--- level-zero-link-flags.cpp - Error handling for link flags --==// diff --git a/sycl/test-e2e/KernelAndProgram/undefined-symbol.cpp b/sycl/test-e2e/KernelAndProgram/undefined-symbol.cpp index 047f5bfbb970b..dc1bd16b06ce1 100644 --- a/sycl/test-e2e/KernelAndProgram/undefined-symbol.cpp +++ b/sycl/test-e2e/KernelAndProgram/undefined-symbol.cpp @@ -1,5 +1,5 @@ // for CUDA and HIP the failure happens at compile time, not during runtime -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: target-nvidia || target-amd // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/LLVMIntrinsicLowering/bitreverse.cpp b/sycl/test-e2e/LLVMIntrinsicLowering/bitreverse.cpp index e1d3a76c70b4d..d63cbbc17a778 100644 --- a/sycl/test-e2e/LLVMIntrinsicLowering/bitreverse.cpp +++ b/sycl/test-e2e/LLVMIntrinsicLowering/bitreverse.cpp @@ -1,6 +1,6 @@ // Test that llvm.bitreverse is lowered correctly by llvm-spirv. -// UNSUPPORTED: hip || cuda +// UNSUPPORTED: target-amd || target-nvidia // XFAIL: spirv-backend // XFAIL-TRACKER: https://github.com/intel/llvm/issues/16318, CMPLRLLVM-62187 diff --git a/sycl/test-e2e/LLVMIntrinsicLowering/sub_byte_bitreverse.cpp b/sycl/test-e2e/LLVMIntrinsicLowering/sub_byte_bitreverse.cpp index 976d30b6be49c..70ac1760bf9de 100644 --- a/sycl/test-e2e/LLVMIntrinsicLowering/sub_byte_bitreverse.cpp +++ b/sycl/test-e2e/LLVMIntrinsicLowering/sub_byte_bitreverse.cpp @@ -1,7 +1,7 @@ // Test that llvm.bitreverse is lowered correctly by llvm-spirv for 2/4-bit // types. -// UNSUPPORTED: hip || cuda +// UNSUPPORTED: target-amd || target-nvidia // XFAIL: gpu // XFAIL-TRACKER: https://github.com/intel/intel-graphics-compiler/issues/330 diff --git a/sycl/test-e2e/Regression/acos.cpp b/sycl/test-e2e/Regression/acos.cpp index 59cb130be0756..42018de69e26f 100644 --- a/sycl/test-e2e/Regression/acos.cpp +++ b/sycl/test-e2e/Regression/acos.cpp @@ -1,5 +1,5 @@ // REQUIRES: aspect-fp64 -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: target-nvidia || target-amd // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/SPVDumpUse/basic.cpp b/sycl/test-e2e/SPVDumpUse/basic.cpp index 1d009acaa0c7d..f4cf391066992 100644 --- a/sycl/test-e2e/SPVDumpUse/basic.cpp +++ b/sycl/test-e2e/SPVDumpUse/basic.cpp @@ -1,4 +1,4 @@ -// REQUIRES: opencl || level_zero +// REQUIRES: target-spir // // SYCL_USE_KERNEL_SPV assumes no dead arguments elimination, need to produce // SPV under the same conditions. diff --git a/sycl/test-e2e/SPVDumpUse/kernel_bundle.cpp b/sycl/test-e2e/SPVDumpUse/kernel_bundle.cpp index ea9402ef301f2..87caa04102df6 100644 --- a/sycl/test-e2e/SPVDumpUse/kernel_bundle.cpp +++ b/sycl/test-e2e/SPVDumpUse/kernel_bundle.cpp @@ -1,4 +1,4 @@ -// REQUIRES: opencl || level_zero +// REQUIRES: target-spir // // SYCL_USE_KERNEL_SPV assumes no dead arguments elimination, need to produce // SPV under the same conditions. diff --git a/sycl/test-e2e/SubGroup/attributes.cpp b/sycl/test-e2e/SubGroup/attributes.cpp index 118349321b4b0..e2f97292a1a23 100644 --- a/sycl/test-e2e/SubGroup/attributes.cpp +++ b/sycl/test-e2e/SubGroup/attributes.cpp @@ -1,6 +1,6 @@ // TODO: Despite using a supported required subgroup size compile_sub_group_size // reports as 0 on cuda and hip -// XFAIL: cuda || hip +// XFAIL: target-nvidia || target-amd // XFAIL-TRACKER: https://github.com/intel/llvm/issues/14357 // RUN: %{build} -fsycl-device-code-split=per_kernel -o %t.out diff --git a/sycl/test-e2e/VirtualFunctions/lit.local.cfg b/sycl/test-e2e/VirtualFunctions/lit.local.cfg index 3626f5dc6d960..e514c02f4d433 100644 --- a/sycl/test-e2e/VirtualFunctions/lit.local.cfg +++ b/sycl/test-e2e/VirtualFunctions/lit.local.cfg @@ -4,4 +4,4 @@ import os # paths like "../../../helper.hpp" in them, so let's just register a # substitution to add directory with helper headers into include search path config.substitutions.append(("%helper-includes", "-I {}".format(os.path.dirname(os.path.abspath(__file__))))) -config.required_features += ['aspect-ext_oneapi_virtual_functions'] +config.required_features += ['aspect-ext_oneapi_virtual_functions', 'target-spir'] From 04fe96c1d2f82e9b3130d30b6aba264926f6e9c2 Mon Sep 17 00:00:00 2001 From: Nick Sarnie Date: Tue, 11 Feb 2025 07:06:12 +0900 Subject: [PATCH 11/13] [CI] Fix driver update script script and bump igc-dev (#16947) Script was failing because there's no Ubuntu 22.04 artifacts anymore. Bump the igc-dev version which fixes some tests. --------- Signed-off-by: Sarnie, Nick --- devops/dependencies-igc-dev.json | 8 ++++---- devops/scripts/update_drivers.py | 2 +- sycl/test-e2e/ESIMD/accessor_local.cpp | 2 -- sycl/test-e2e/ESIMD/local_accessor_block_load_store.cpp | 2 -- sycl/test-e2e/ESIMD/local_accessor_copy_to_from.cpp | 2 -- .../ESIMD/lsc/lsc_local_accessor_block_load_store.cpp | 2 -- 6 files changed, 5 insertions(+), 13 deletions(-) diff --git a/devops/dependencies-igc-dev.json b/devops/dependencies-igc-dev.json index 37b9f160ec3f2..7160a02db6f71 100644 --- a/devops/dependencies-igc-dev.json +++ b/devops/dependencies-igc-dev.json @@ -1,10 +1,10 @@ { "linux": { "igc_dev": { - "github_tag": "igc-dev-61b96b3", - "version": "61b96b3", - "updated_at": "2025-01-15T17:43:30Z", - "url": "https://api.github.com/repos/intel/intel-graphics-compiler/actions/artifacts/2435370337/zip", + "github_tag": "igc-dev-4cc8dff", + "version": "4cc8dff", + "updated_at": "2025-02-10T10:27:30Z", + "url": "https://api.github.com/repos/intel/intel-graphics-compiler/actions/artifacts/2564401848/zip", "root": "{DEPS_ROOT}/opencl/runtime/linux/oclgpu" } } diff --git a/devops/scripts/update_drivers.py b/devops/scripts/update_drivers.py index c96d7dffed2d8..41ea70aeaf089 100644 --- a/devops/scripts/update_drivers.py +++ b/devops/scripts/update_drivers.py @@ -38,7 +38,7 @@ def uplift_linux_igfx_driver(config, platform_tag, igc_dev_only): config[platform_tag]["igc_dev"]["version"] = igcdevver config[platform_tag]["igc_dev"]["updated_at"] = igc_dev["updated_at"] config[platform_tag]["igc_dev"]["url"] = get_artifacts_download_url( - "intel/intel-graphics-compiler", "IGC_Ubuntu22.04_llvm14_clang-" + igcdevver + "intel/intel-graphics-compiler", "IGC_Ubuntu24.04_llvm14_clang-" + igcdevver ) return config diff --git a/sycl/test-e2e/ESIMD/accessor_local.cpp b/sycl/test-e2e/ESIMD/accessor_local.cpp index 21382f8ee401c..e566ade99b754 100644 --- a/sycl/test-e2e/ESIMD/accessor_local.cpp +++ b/sycl/test-e2e/ESIMD/accessor_local.cpp @@ -1,6 +1,4 @@ // REQUIRES-INTEL-DRIVER: lin: 27202, win: 101.4677 -// XFAIL: igc-dev -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16388 // RUN: %{build} -o %t.out // RUN: %{run} %t.out // This test verifies usage of local_accessor methods operator[] diff --git a/sycl/test-e2e/ESIMD/local_accessor_block_load_store.cpp b/sycl/test-e2e/ESIMD/local_accessor_block_load_store.cpp index ce8b4d30d0928..9d7a79c8fe2ad 100644 --- a/sycl/test-e2e/ESIMD/local_accessor_block_load_store.cpp +++ b/sycl/test-e2e/ESIMD/local_accessor_block_load_store.cpp @@ -6,8 +6,6 @@ // //===----------------------------------------------------------------------===// // REQUIRES-INTEL-DRIVER: lin: 27202, win: 101.4677 -// XFAIL: igc-dev -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16388 // RUN: %{build} -o %t.out // RUN: %{run} %t.out // This test verifies usage of block_load/block_store for local_accessor. diff --git a/sycl/test-e2e/ESIMD/local_accessor_copy_to_from.cpp b/sycl/test-e2e/ESIMD/local_accessor_copy_to_from.cpp index c63411b286b32..664e7709a7e81 100644 --- a/sycl/test-e2e/ESIMD/local_accessor_copy_to_from.cpp +++ b/sycl/test-e2e/ESIMD/local_accessor_copy_to_from.cpp @@ -6,8 +6,6 @@ // //===----------------------------------------------------------------------===// // REQUIRES-INTEL-DRIVER: lin: 27202, win: 101.4677 -// XFAIL: igc-dev -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16388 // RUN: %{build} -o %t.out // RUN: %{run} %t.out // diff --git a/sycl/test-e2e/ESIMD/lsc/lsc_local_accessor_block_load_store.cpp b/sycl/test-e2e/ESIMD/lsc/lsc_local_accessor_block_load_store.cpp index 790b7dc2a92da..fd4a013ee200b 100644 --- a/sycl/test-e2e/ESIMD/lsc/lsc_local_accessor_block_load_store.cpp +++ b/sycl/test-e2e/ESIMD/lsc/lsc_local_accessor_block_load_store.cpp @@ -7,8 +7,6 @@ //===----------------------------------------------------------------------===// // REQUIRES: arch-intel_gpu_pvc || gpu-intel-dg2 // REQUIRES-INTEL-DRIVER: lin: 26690, win: 101.4576 -// XFAIL: igc-dev -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16388 // RUN: %{build} -o %t.out // RUN: %{run} %t.out // From 50f0552b37d7b60c0447fc1ea61830bc8d74a2fa Mon Sep 17 00:00:00 2001 From: Daniel Skrobot Date: Mon, 10 Feb 2025 23:07:30 +0100 Subject: [PATCH 12/13] [SYCL][E2E] Limit XFAIL only to CPU and Linux in VirtualFunctions/group-barrier test (#16940) Purpose of this PR is re-enabling `VirtualFunctions/group-barrier` e2e test for CPU and Windows OS. --- sycl/test-e2e/VirtualFunctions/misc/group-barrier.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test-e2e/VirtualFunctions/misc/group-barrier.cpp b/sycl/test-e2e/VirtualFunctions/misc/group-barrier.cpp index 48db619d94081..9383fbe7a1724 100644 --- a/sycl/test-e2e/VirtualFunctions/misc/group-barrier.cpp +++ b/sycl/test-e2e/VirtualFunctions/misc/group-barrier.cpp @@ -1,7 +1,7 @@ // REQUIRES: aspect-usm_shared_allocations // // On CPU it segfaults within the kernel that performs virtual function call. -// XFAIL: cpu +// XFAIL: cpu && opencl && linux // XFAIL-TRACKER: https://github.com/intel/llvm/issues/15080 // UNSUPPORTED: gpu // UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/15068 From 36ce10ed7a5bcd14fcd180cc245aaa1dbb68e22e Mon Sep 17 00:00:00 2001 From: aelovikov-intel Date: Mon, 10 Feb 2025 16:02:00 -0800 Subject: [PATCH 13/13] [NFCI][SYCL] Split vec's unary ops into individual mixins (#16946) We don't want constraints on non-template hidden friend operators to be implemented by templatizing it. It would be trivial with C++20 concepts, but we're limited to C++17. The proposed class hierarchy for `vecto_arith` helper had intended that, but implementation was only partial. We want to fix that while implementing the proposed specification changes to the vec/swizzle so setup infrastructure to do so now, to minimize the amount of "preview-breaking-changes" customizations later. The idea is to split each operator into an individual unrestrained mixin, so that the constraints could be implemented on top of it via `detail::ApplyIf` helper. This particular PR only implements such a change for unary operators. The rest will be implemented in a subsequent change to ease review process. --- sycl/include/sycl/detail/vector_arith.hpp | 152 ++++++++---------- sycl/include/sycl/vector.hpp | 1 - .../vector/vector_bf16_builtins.cpp | 24 +-- .../vector/vector_convert_bfloat.cpp | 14 +- .../vector/vector_math_ops.cpp | 68 ++++---- 5 files changed, 124 insertions(+), 135 deletions(-) diff --git a/sycl/include/sycl/detail/vector_arith.hpp b/sycl/include/sycl/detail/vector_arith.hpp index e628ebb1ae260..cdb8b04b11da6 100644 --- a/sycl/include/sycl/detail/vector_arith.hpp +++ b/sycl/include/sycl/detail/vector_arith.hpp @@ -59,11 +59,39 @@ struct UnaryPlus { } }; -struct VecOperators { +// Tag to map/templatize the mixin for prefix/postfix inc/dec operators. +struct IncDec {}; + +template struct IncDecImpl { + using element_type = typename from_incomplete::element_type; + using vec_t = simplify_if_swizzle_t>; + +public: + friend SelfOperandTy &operator++(SelfOperandTy &x) { + x += element_type{1}; + return x; + } + friend SelfOperandTy &operator--(SelfOperandTy &x) { + x -= element_type{1}; + return x; + } + friend auto operator++(SelfOperandTy &x, int) { + vec_t tmp{x}; + x += element_type{1}; + return tmp; + } + friend auto operator--(SelfOperandTy &x, int) { + vec_t tmp{x}; + x -= element_type{1}; + return tmp; + } +}; + +template struct VecOperators { + static_assert(is_vec_v); + template static constexpr auto apply(const ArgTys &...Args) { - using Self = nth_type_t<0, ArgTys...>; - static_assert(is_vec_v); static_assert(((std::is_same_v && ...))); using element_type = typename Self::element_type; @@ -163,6 +191,41 @@ struct VecOperators { res[i] = Op(Args[i]...); return res; } + + // Uglier than possible due to + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85282. + template struct OpMixin; + + template + struct OpMixin>> + : public IncDecImpl {}; + +#define __SYCL_VEC_UOP_MIXIN(OP, OPERATOR) \ + template \ + struct OpMixin>> { \ + friend auto operator OPERATOR(const Self &v) { return apply(v); } \ + }; + + __SYCL_VEC_UOP_MIXIN(std::negate, -) + __SYCL_VEC_UOP_MIXIN(std::logical_not, !) + __SYCL_VEC_UOP_MIXIN(UnaryPlus, +) + + template + struct OpMixin>>> { + template ::element_type> + friend std::enable_if_t, Self> operator~(const Self &v) { + return apply>(v); + } + }; + +#undef __SYCL_VEC_UOP_MIXIN + + template + struct __SYCL_EBO CombineImpl : public OpMixin... {}; + + struct Combined + : public CombineImpl, std::logical_not, + std::bit_not, UnaryPlus, IncDec> {}; }; // Macros to populate binary operation on sycl::vec. @@ -174,7 +237,7 @@ struct VecOperators { template \ friend std::enable_if_t<(COND), vec_t> operator BINOP(const vec_t & Lhs, \ const vec_t & Rhs) { \ - return VecOperators::apply(Lhs, Rhs); \ + return VecOperators::template apply(Lhs, Rhs); \ } \ \ template \ @@ -200,65 +263,11 @@ struct VecOperators { return Lhs; \ } -/**************************************************************** - * vec_arith_common - * / | \ - * / | \ - * vec_arith vec_arith ... vec_arith - * \ | / - * \ | / - * sycl::vec - * - * vec_arith_common is the base class for vec_arith. It contains - * the common math operators of sycl::vec for all types. - * vec_arith is the derived class that contains the math operators - * specialized for certain types. sycl::vec inherits from vec_arith. - * *************************************************************/ -template class vec_arith_common; -template struct vec_helper; - template -class vec_arith : public vec_arith_common { +class vec_arith : public VecOperators>::Combined { protected: using vec_t = vec; using ocl_t = detail::fixed_width_signed; - template using vec_data = vec_helper; - - // operator!. - friend vec operator!(const vec_t &Rhs) { - return VecOperators::apply>(Rhs); - } - - // operator +. - friend vec_t operator+(const vec_t &Lhs) { - return VecOperators::apply(Lhs); - } - - // operator -. - friend vec_t operator-(const vec_t &Lhs) { - return VecOperators::apply>(Lhs); - } - -// Unary operations on sycl::vec -// FIXME: Don't allow Unary operators on vec after -// https://github.com/KhronosGroup/SYCL-CTS/issues/896 gets fixed. -#ifdef __SYCL_UOP -#error "Undefine __SYCL_UOP macro" -#endif -#define __SYCL_UOP(UOP, OPASSIGN) \ - friend vec_t &operator UOP(vec_t & Rhs) { \ - Rhs OPASSIGN DataT{1}; \ - return Rhs; \ - } \ - friend vec_t operator UOP(vec_t &Lhs, int) { \ - vec_t Ret(Lhs); \ - Lhs OPASSIGN DataT{1}; \ - return Ret; \ - } - - __SYCL_UOP(++, +=) - __SYCL_UOP(--, -=) -#undef __SYCL_UOP // The logical operations on scalar types results in 0/1, while for vec<>, // logical operations should result in 0 and -1 (similar to OpenCL vectors). @@ -272,7 +281,7 @@ class vec_arith : public vec_arith_common { template \ friend std::enable_if_t<(COND), vec> operator RELLOGOP( \ const vec_t & Lhs, const vec_t & Rhs) { \ - return VecOperators::apply(Lhs, Rhs); \ + return VecOperators::template apply(Lhs, Rhs); \ } \ \ template \ @@ -325,13 +334,13 @@ class vec_arith : public vec_arith_common { #if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0) template class vec_arith - : public vec_arith_common { + : public VecOperators>::template OpMixin< + std::bit_not> { protected: // NumElements can never be zero. Still using the redundant check to avoid // incomplete type errors. using DataT = typename std::conditional_t; using vec_t = vec; - template using vec_data = vec_helper; // Special <<, >> operators for std::byte. // std::byte is not an arithmetic type and it only supports the following @@ -376,25 +385,6 @@ class vec_arith }; #endif // (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0) -template class vec_arith_common { -protected: - using vec_t = vec; - - static constexpr bool IsBfloat16 = - std::is_same_v; - - // operator~() available only when: dataT != float && dataT != double - // && dataT != half - template - friend std::enable_if_t, vec_t> - operator~(const vec_t &Rhs) { - return VecOperators::apply>(Rhs); - } - - // friends - template friend class __SYCL_EBO vec; -}; - #undef __SYCL_BINOP } // namespace detail diff --git a/sycl/include/sycl/vector.hpp b/sycl/include/sycl/vector.hpp index 67d00bd9ea7de..1b626e8fc341e 100644 --- a/sycl/include/sycl/vector.hpp +++ b/sycl/include/sycl/vector.hpp @@ -435,7 +435,6 @@ class __SYCL_EBO vec template friend class __SYCL_EBO vec; // To allow arithmetic operators access private members of vec. template friend class detail::vec_arith; - template friend class detail::vec_arith_common; }; ///////////////////////// class sycl::vec ///////////////////////// diff --git a/sycl/test/check_device_code/vector/vector_bf16_builtins.cpp b/sycl/test/check_device_code/vector/vector_bf16_builtins.cpp index a8c916b5fe3c4..e73ac212fb5fd 100644 --- a/sycl/test/check_device_code/vector/vector_bf16_builtins.cpp +++ b/sycl/test/check_device_code/vector/vector_bf16_builtins.cpp @@ -69,7 +69,7 @@ SYCL_EXTERNAL auto TestFMin(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z8TestFMaxN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEES5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.6") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.6") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.6") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.14") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.14") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.14") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I12_I:%.*]] = alloca <3 x float>, align 16 // CHECK-NEXT: [[DST_I_I_I_I13_I:%.*]] = alloca [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 @@ -87,7 +87,7 @@ SYCL_EXTERNAL auto TestFMin(vec a, vec b) { // CHECK-NEXT: [[EXTRACTVEC_I_I_I_I_I:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <4 x i32> // CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I_I_I_I_I]], ptr [[VEC_ADDR_I_I_I_I_I]], align 8, !tbaa [[TBAA14]], !noalias [[META28]] // CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec3(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I_I]]) #[[ATTR5]], !noalias [[META28]] -// CHECK-NEXT: [[LOADVEC4_I_I_I_I_I_I:%.*]] = load <4 x float>, ptr [[DST_I_I_I_I_I]], align 4, !noalias [[META28]] +// CHECK-NEXT: [[LOADVECN_I_I_I_I_I_I:%.*]] = load <4 x float>, ptr [[DST_I_I_I_I_I]], align 4, !noalias [[META28]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I_I]]), !noalias [[META28]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[DST_I_I_I_I_I]]), !noalias [[META28]] // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I2_I]]), !noalias [[META31:![0-9]+]] @@ -97,11 +97,11 @@ SYCL_EXTERNAL auto TestFMin(vec a, vec b) { // CHECK-NEXT: [[EXTRACTVEC_I_I_I_I7_I:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> poison, <4 x i32> // CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I_I_I_I7_I]], ptr [[VEC_ADDR_I_I_I_I2_I]], align 8, !tbaa [[TBAA14]], !noalias [[META31]] // CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec3(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I5_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I6_I]]) #[[ATTR5]], !noalias [[META31]] -// CHECK-NEXT: [[LOADVEC4_I_I_I_I_I8_I:%.*]] = load <4 x float>, ptr [[DST_I_I_I_I3_I]], align 4, !noalias [[META31]] +// CHECK-NEXT: [[LOADVECN_I_I_I_I_I8_I:%.*]] = load <4 x float>, ptr [[DST_I_I_I_I3_I]], align 4, !noalias [[META31]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I2_I]]), !noalias [[META31]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[DST_I_I_I_I3_I]]), !noalias [[META31]] -// CHECK-NEXT: [[EXTRACTVEC_I_I_I_I:%.*]] = shufflevector <4 x float> [[LOADVEC4_I_I_I_I_I_I]], <4 x float> poison, <3 x i32> -// CHECK-NEXT: [[EXTRACTVEC_I_I4_I_I:%.*]] = shufflevector <4 x float> [[LOADVEC4_I_I_I_I_I8_I]], <4 x float> poison, <3 x i32> +// CHECK-NEXT: [[EXTRACTVEC_I_I_I_I:%.*]] = shufflevector <4 x float> [[LOADVECN_I_I_I_I_I_I]], <4 x float> poison, <3 x i32> +// CHECK-NEXT: [[EXTRACTVEC_I_I4_I_I:%.*]] = shufflevector <4 x float> [[LOADVECN_I_I_I_I_I8_I]], <4 x float> poison, <3 x i32> // CHECK-NEXT: [[CALL2_I_I:%.*]] = call spir_func noundef <3 x float> @_Z16__spirv_ocl_fmaxDv3_fS_(<3 x float> noundef [[EXTRACTVEC_I_I_I_I]], <3 x float> noundef [[EXTRACTVEC_I_I4_I_I]]) #[[ATTR6]] // CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META34:![0-9]+]]) // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[VEC_ADDR_I_I_I_I12_I]]), !noalias [[META37:![0-9]+]] @@ -111,11 +111,11 @@ SYCL_EXTERNAL auto TestFMin(vec a, vec b) { // CHECK-NEXT: [[EXTRACTVEC_I_I_I_I17_I:%.*]] = shufflevector <3 x float> [[CALL2_I_I]], <3 x float> poison, <4 x i32> // CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I_I_I_I17_I]], ptr [[VEC_ADDR_I_I_I_I12_I]], align 16, !tbaa [[TBAA14]], !noalias [[META37]] // CHECK-NEXT: call spir_func void @__devicelib_ConvertFToBF16INTELVec3(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I15_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I16_I]]) #[[ATTR5]], !noalias [[META37]] -// CHECK-NEXT: [[LOADVEC4_I_I_I_I_I18_I:%.*]] = load <4 x i16>, ptr [[DST_I_I_I_I13_I]], align 2, !noalias [[META37]] +// CHECK-NEXT: [[LOADVECN_I_I_I_I_I18_I:%.*]] = load <4 x i16>, ptr [[DST_I_I_I_I13_I]], align 2, !noalias [[META37]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[VEC_ADDR_I_I_I_I12_I]]), !noalias [[META37]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[DST_I_I_I_I13_I]]), !noalias [[META37]] -// CHECK-NEXT: [[EXTRACTVEC4_I19_I:%.*]] = shufflevector <4 x i16> [[LOADVEC4_I_I_I_I_I18_I]], <4 x i16> poison, <4 x i32> -// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC4_I19_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META37]] +// CHECK-NEXT: [[EXTRACTVEC_I19_I:%.*]] = shufflevector <4 x i16> [[LOADVECN_I_I_I_I_I18_I]], <4 x i16> poison, <4 x i32> +// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I19_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META37]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestFMax(vec a, vec b) { @@ -123,7 +123,7 @@ SYCL_EXTERNAL auto TestFMax(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z9TestIsNanN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi4EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.18") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.24") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META38:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.34") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.44") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META38:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I_I:%.*]] = alloca <4 x i16>, align 8 // CHECK-NEXT: [[DST_I_I_I_I_I:%.*]] = alloca [4 x float], align 4 @@ -149,7 +149,7 @@ SYCL_EXTERNAL auto TestIsNan(vec a) { } // CHECK-LABEL: define dso_local spir_func void @_Z8TestFabsN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi8EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.46") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.46") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.82") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.82") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I2_I:%.*]] = alloca <8 x float>, align 32 // CHECK-NEXT: [[DST_I_I_I_I3_I:%.*]] = alloca [8 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 @@ -185,7 +185,7 @@ SYCL_EXTERNAL auto TestFabs(vec a) { } // CHECK-LABEL: define dso_local spir_func void @_Z8TestCeilN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi8EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.46") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.46") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META59:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.82") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.82") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META59:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I2_I:%.*]] = alloca <8 x float>, align 32 // CHECK-NEXT: [[DST_I_I_I_I3_I:%.*]] = alloca [8 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 @@ -221,7 +221,7 @@ SYCL_EXTERNAL auto TestCeil(vec a) { } // CHECK-LABEL: define dso_local spir_func void @_Z7TestFMAN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi16EEES5_S5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.58") align 32 initializes((0, 32)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.58") align 32 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.58") align 32 [[B:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.58") align 32 [[C:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META70:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.102") align 32 initializes((0, 32)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.102") align 32 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.102") align 32 [[B:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.102") align 32 [[C:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META70:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I14_I:%.*]] = alloca <16 x float>, align 64 // CHECK-NEXT: [[DST_I_I_I_I15_I:%.*]] = alloca [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 diff --git a/sycl/test/check_device_code/vector/vector_convert_bfloat.cpp b/sycl/test/check_device_code/vector/vector_convert_bfloat.cpp index 11fe56b0b54c3..7074116fa1b96 100644 --- a/sycl/test/check_device_code/vector/vector_convert_bfloat.cpp +++ b/sycl/test/check_device_code/vector/vector_convert_bfloat.cpp @@ -63,7 +63,7 @@ SYCL_EXTERNAL auto TestBFtoFDeviceRZ(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z19TestBFtointDeviceRZRN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.6") align 16 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 8 dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META18:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.14") align 16 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 8 dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META18:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) // CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x i16>, ptr addrspace(4) [[INP]], align 8, !noalias [[META19]] @@ -90,7 +90,7 @@ SYCL_EXTERNAL auto TestBFtointDeviceRZ(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z20TestBFtointDeviceRNERN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi1EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.12") align 4 initializes((0, 4)) [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 2 dereferenceable(2) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.24") align 4 initializes((0, 4)) [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 2 dereferenceable(2) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr addrspace(4) [[INP]], align 2, !tbaa [[TBAA11]], !noalias [[META25]] @@ -103,7 +103,7 @@ SYCL_EXTERNAL auto TestBFtointDeviceRNE(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z18TestFtoBFDeviceRNERN4sycl3_V13vecIfLi3EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.0") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 16 dereferenceable(16) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META28:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.4") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 16 dereferenceable(16) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META28:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I:%.*]] = alloca <3 x float>, align 16 // CHECK-NEXT: [[DST_I_I_I_I:%.*]] = alloca [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 @@ -128,7 +128,7 @@ SYCL_EXTERNAL auto TestFtoBFDeviceRNE(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z17TestFtoBFDeviceRZRN4sycl3_V13vecIfLi3EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.0") align 8 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 16 dereferenceable(16) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META32:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.4") align 8 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 16 dereferenceable(16) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META32:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META33:![0-9]+]]) // CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x float>, ptr addrspace(4) [[INP]], align 16, !noalias [[META33]] @@ -155,7 +155,7 @@ SYCL_EXTERNAL auto TestFtoBFDeviceRZ(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z19TestInttoBFDeviceRZRN4sycl3_V13vecIiLi3EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.0") align 8 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 16 dereferenceable(16) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META37:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.4") align 8 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 16 dereferenceable(16) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META37:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META38:![0-9]+]]) // CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x i32>, ptr addrspace(4) [[INP]], align 16, !noalias [[META38]] @@ -182,7 +182,7 @@ SYCL_EXTERNAL auto TestInttoBFDeviceRZ(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z19TestLLtoBFDeviceRTPRN4sycl3_V13vecIxLi1EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.18") align 2 initializes((0, 2)) [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 8 dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META42:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.34") align 2 initializes((0, 2)) [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 8 dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META42:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META43:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) [[INP]], align 8, !tbaa [[TBAA46:![0-9]+]], !noalias [[META43]] @@ -195,7 +195,7 @@ SYCL_EXTERNAL auto TestLLtoBFDeviceRTP(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z22TestShorttoBFDeviceRTNRN4sycl3_V13vecIsLi2EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.32") align 4 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 4 dereferenceable(4) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.56") align 4 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 4 dereferenceable(4) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META49:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr addrspace(4) [[INP]], align 4, !tbaa [[TBAA11]], !noalias [[META49]] diff --git a/sycl/test/check_device_code/vector/vector_math_ops.cpp b/sycl/test/check_device_code/vector/vector_math_ops.cpp index 2f24e0c1aa2a5..819337fea1d17 100644 --- a/sycl/test/check_device_code/vector/vector_math_ops.cpp +++ b/sycl/test/check_device_code/vector/vector_math_ops.cpp @@ -32,13 +32,13 @@ using namespace sycl; SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecIfLi3EEES2_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.1") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.1") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.1") align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META21:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.5") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.5") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.5") align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META21:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) -// CHECK-NEXT: [[LOADVEC4_I_I_I:%.*]] = load <4 x float>, ptr [[A]], align 16, !noalias [[META28:![0-9]+]] -// CHECK-NEXT: [[LOADVEC4_I6_I_I:%.*]] = load <4 x float>, ptr [[B]], align 16, !noalias [[META28]] -// CHECK-NEXT: [[TMP0:%.*]] = fadd <4 x float> [[LOADVEC4_I_I_I]], [[LOADVEC4_I6_I_I]] +// CHECK-NEXT: [[LOADVECN_I_I_I:%.*]] = load <4 x float>, ptr [[A]], align 16, !noalias [[META28:![0-9]+]] +// CHECK-NEXT: [[LOADVECN_I6_I_I:%.*]] = load <4 x float>, ptr [[B]], align 16, !noalias [[META28]] +// CHECK-NEXT: [[TMP0:%.*]] = fadd <4 x float> [[LOADVECN_I_I_I]], [[LOADVECN_I6_I_I]] // CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <4 x i32> // CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META28]] // CHECK-NEXT: ret void @@ -46,7 +46,7 @@ SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecIcLi16EEES2_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.9") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.9") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.9") align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META29:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.17") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.17") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.17") align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META29:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META30:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META33:![0-9]+]]) @@ -60,7 +60,7 @@ SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // std::byte does not support '+'. Therefore, using bitwise XOR as a substitute. // CHECK-LABEL: define dso_local spir_func void @_Z7TestXorN4sycl3_V13vecISt4byteLi8EEES3_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.17") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.17") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.17") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META37:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.29") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.29") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.29") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META37:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META38:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META41:![0-9]+]]) @@ -75,7 +75,7 @@ SYCL_EXTERNAL auto TestXor(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecIbLi4EEES2_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.25") align 4 initializes((0, 4)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.25") align 4 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.25") align 4 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.36") align 4 initializes((0, 4)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.36") align 4 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.36") align 4 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META49:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META52:![0-9]+]]) @@ -90,13 +90,13 @@ SYCL_EXTERNAL auto TestXor(vec a, vec b) { SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecINS0_6detail9half_impl4halfELi3EEES5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.33") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.33") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.33") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META59:![0-9]+]] !sycl_used_aspects [[META60:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.48") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.48") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.48") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META59:![0-9]+]] !sycl_used_aspects [[META60:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META62:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META65:![0-9]+]]) -// CHECK-NEXT: [[LOADVEC4_I_I_I:%.*]] = load <4 x half>, ptr [[A]], align 8, !noalias [[META68:![0-9]+]] -// CHECK-NEXT: [[LOADVEC4_I6_I_I:%.*]] = load <4 x half>, ptr [[B]], align 8, !noalias [[META68]] -// CHECK-NEXT: [[TMP0:%.*]] = fadd <4 x half> [[LOADVEC4_I_I_I]], [[LOADVEC4_I6_I_I]] +// CHECK-NEXT: [[LOADVECN_I_I_I:%.*]] = load <4 x half>, ptr [[A]], align 8, !noalias [[META68:![0-9]+]] +// CHECK-NEXT: [[LOADVECN_I6_I_I:%.*]] = load <4 x half>, ptr [[B]], align 8, !noalias [[META68]] +// CHECK-NEXT: [[TMP0:%.*]] = fadd <4 x half> [[LOADVECN_I_I_I]], [[LOADVECN_I6_I_I]] // CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <4 x half> [[TMP0]], <4 x half> poison, <4 x i32> // CHECK-NEXT: store <4 x half> [[EXTRACTVEC_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META69:![0-9]+]] // CHECK-NEXT: ret void @@ -104,10 +104,10 @@ SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEES5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.41") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.41") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.41") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] !srcloc [[META72:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.60") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.60") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.60") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] !srcloc [[META72:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[REF_TMP_I_I_I_I:%.*]] = alloca float, align 4 -// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.41", align 8 +// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.60", align 8 // CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) // CHECK-NEXT: [[B_ASCAST:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(4) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META73:![0-9]+]]) @@ -148,7 +148,7 @@ SYCL_EXTERNAL auto TestAdd(vec a, /***************** Binary Logical Ops *******************/ // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecIiLi16EEES2_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.47") align 64 initializes((0, 64)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.47") align 64 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.47") align 64 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META92:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.70") align 64 initializes((0, 64)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.70") align 64 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.70") align 64 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META92:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META93:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META96:![0-9]+]]) @@ -164,11 +164,11 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func noundef range(i8 -1, 1) <3 x i8> @_Z15TestGreaterThanN4sycl3_V13vecISt4byteLi3EEES3_( -// CHECK-SAME: ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.56") align 4 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.56") align 4 [[B:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] !srcloc [[META100:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.83") align 4 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.83") align 4 [[B:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] !srcloc [[META100:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[LOADVEC4_I_I:%.*]] = load <4 x i8>, ptr [[A]], align 1 -// CHECK-NEXT: [[LOADVEC4_I_I2:%.*]] = load <4 x i8>, ptr [[B]], align 1 -// CHECK-NEXT: [[TMP0:%.*]] = icmp ugt <4 x i8> [[LOADVEC4_I_I]], [[LOADVEC4_I_I2]] +// CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x i8>, ptr [[A]], align 1 +// CHECK-NEXT: [[LOADVECN_I_I2:%.*]] = load <4 x i8>, ptr [[B]], align 1 +// CHECK-NEXT: [[TMP0:%.*]] = icmp ugt <4 x i8> [[LOADVECN_I_I]], [[LOADVECN_I_I2]] // CHECK-NEXT: [[CMP:%.*]] = shufflevector <4 x i1> [[TMP0]], <4 x i1> poison, <3 x i32> // CHECK-NEXT: [[SEXT:%.*]] = sext <3 x i1> [[CMP]] to <3 x i8> // CHECK-NEXT: ret <3 x i8> [[SEXT]] @@ -178,7 +178,7 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecIbLi2EEES2_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.62") align 2 initializes((0, 2)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.68") align 2 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.68") align 2 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META101:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.88") align 2 initializes((0, 2)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.98") align 2 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.98") align 2 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META101:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META102:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META105:![0-9]+]]) @@ -194,7 +194,7 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecINS0_6detail9half_impl4halfELi8EEES5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.78") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.84") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.84") align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META112:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.112") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.122") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.122") align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META112:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META113:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META116:![0-9]+]]) @@ -210,9 +210,9 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi4EEES5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.94") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.100") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.100") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR1]] !srcloc [[META120:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.136") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.146") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.146") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR1]] !srcloc [[META120:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.94", align 8 +// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.136", align 8 // CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) // CHECK-NEXT: [[B_ASCAST:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(4) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META121:![0-9]+]]) @@ -249,12 +249,12 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, /********************** Unary Ops **********************/ // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecIiLi3EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.105") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.105") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META129:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.155") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.155") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META129:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META130:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META133:![0-9]+]]) -// CHECK-NEXT: [[LOADVEC4_I_I_I:%.*]] = load <4 x i32>, ptr [[A]], align 16, !noalias [[META136:![0-9]+]] -// CHECK-NEXT: [[EXTRACTVEC_I_I_I:%.*]] = shufflevector <4 x i32> [[LOADVEC4_I_I_I]], <4 x i32> poison, <3 x i32> +// CHECK-NEXT: [[LOADVECN_I_I_I:%.*]] = load <4 x i32>, ptr [[A]], align 16, !noalias [[META136:![0-9]+]] +// CHECK-NEXT: [[EXTRACTVEC_I_I_I:%.*]] = shufflevector <4 x i32> [[LOADVECN_I_I_I]], <4 x i32> poison, <3 x i32> // CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = icmp eq <3 x i32> [[EXTRACTVEC_I_I_I]], zeroinitializer // CHECK-NEXT: [[SEXT_I_I_I_I:%.*]] = sext <3 x i1> [[CMP_I_I_I_I]] to <3 x i32> // CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <3 x i32> [[SEXT_I_I_I_I]], <3 x i32> poison, <4 x i32> @@ -264,7 +264,7 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z9TestMinusN4sycl3_V13vecIiLi4EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.112") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.112") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META137:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.166") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.166") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META137:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META138:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META141:![0-9]+]]) @@ -277,7 +277,7 @@ SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } // Negation is not valid for std::byte. Therefore, using bitwise negation. // CHECK-LABEL: define dso_local spir_func void @_Z19TestBitwiseNegationN4sycl3_V13vecISt4byteLi16EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.118") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.118") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META145:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.176") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.176") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META145:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META146:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META149:![0-9]+]]) @@ -289,7 +289,7 @@ SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } SYCL_EXTERNAL auto TestBitwiseNegation(vec a) { return ~a; } // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecIbLi4EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.125") align 4 initializes((0, 4)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.25") align 4 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META153:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.182") align 4 initializes((0, 4)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.36") align 4 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META153:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META154:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META157:![0-9]+]]) @@ -302,7 +302,7 @@ SYCL_EXTERNAL auto TestBitwiseNegation(vec a) { return ~a; } SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecINS0_6detail9half_impl4halfELi2EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.132") align 4 initializes((0, 4)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.138") align 4 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META164:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.193") align 4 initializes((0, 4)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.203") align 4 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META164:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META165:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META168:![0-9]+]]) @@ -315,7 +315,7 @@ SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z9TestMinusN4sycl3_V13vecINS0_6detail9half_impl4halfELi8EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.84") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.84") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META175:![0-9]+]] !sycl_used_aspects [[META60]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.122") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.122") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META175:![0-9]+]] !sycl_used_aspects [[META60]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META176:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META179:![0-9]+]]) @@ -327,9 +327,9 @@ SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.146") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.41") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] !srcloc [[META183:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.215") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.60") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] !srcloc [[META183:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.146", align 8 +// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.215", align 8 // CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META184:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META187:![0-9]+]]) @@ -358,10 +358,10 @@ SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z9TestMinusN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi16EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.151") align 32 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.151") align 32 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] !srcloc [[META192:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.224") align 32 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.224") align 32 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] !srcloc [[META192:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[REF_TMP_I_I_I_I:%.*]] = alloca float, align 4 -// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.151", align 32 +// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.224", align 32 // CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[RES_I_I]]), !noalias [[META193:![0-9]+]] // CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[RES_I_I]], i8 0, i64 32, i1 false), !noalias [[META196:![0-9]+]]