diff --git a/src/utils/sort.hpp b/src/utils/sort.hpp index a4ab8139dab3..2076648f16c3 100644 --- a/src/utils/sort.hpp +++ b/src/utils/sort.hpp @@ -61,34 +61,13 @@ void sort(ParArray1D data, KeyComparator comparator, size_t min_idx, size_t max_idx) { PARTHENON_DEBUG_REQUIRE(min_idx < data.extent(0), "Invalid minimum sort index!"); PARTHENON_DEBUG_REQUIRE(max_idx < data.extent(0), "Invalid maximum sort index!"); -#if defined(KOKKOS_ENABLE_CUDA) -#ifdef __clang__ - PARTHENON_FAIL("sort is using thrust and there exists an incompatibility with clang, " - "see https://github.com/lanl/parthenon/issues/647 for more details. We " - "won't fix it because eventually the Parthenon sort should make use of " - "Kokkos::sort once a performant implementation is availabe. If you see " - "this message and need sort on CUDA devices with clang compiler please " - "get in touch by opening an issue on the Parthenon GitHub repo."); -#else +#if defined(KOKKOS_ENABLE_CUDA) && !defined(__clang__) thrust::device_ptr first_d = thrust::device_pointer_cast(data.data()) + min_idx; thrust::device_ptr last_d = thrust::device_pointer_cast(data.data()) + max_idx + 1; thrust::sort(first_d, last_d, comparator); -#endif -#elif defined(KOKKOS_ENABLE_HIP) - auto data_h = Kokkos::create_mirror_view_and_copy(HostMemSpace(), data); - std::sort(data_h.data() + min_idx, data_h.data() + max_idx + 1, comparator); - Kokkos::deep_copy(data, data_h); - // TODO(BRR) With Kokkos 4.4, switch to Kokkos::sort - // auto sub_data = Kokkos::subview(data, std::make_pair(min_idx, max_idx + 1)); - // Kokkos::sort(sub_data, comparator); #else - if (std::is_same::value) { - std::sort(data.data() + min_idx, data.data() + max_idx + 1, comparator); - } else { - PARTHENON_FAIL("sort is not supported outside of CPU or NVIDIA GPU. If you need sort " - "support on other devices, e.g., AMD or Intel GPUs, please get in " - "touch by opening an issue on the Parthenon GitHub."); - } + auto sub_data = Kokkos::subview(data, std::make_pair(min_idx, max_idx + 1)); + Kokkos::sort(sub_data, comparator); #endif // KOKKOS_ENABLE_CUDA } @@ -96,33 +75,13 @@ template void sort(ParArray1D data, size_t min_idx, size_t max_idx) { PARTHENON_DEBUG_REQUIRE(min_idx < data.extent(0), "Invalid minimum sort index!"); PARTHENON_DEBUG_REQUIRE(max_idx < data.extent(0), "Invalid maximum sort index!"); -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) -#ifdef __clang__ - PARTHENON_FAIL("sort is using thrust and there exists an incompatibility with clang, " - "see https://github.com/lanl/parthenon/issues/647 for more details. We " - "won't fix it because eventually the Parthenon sort should make use of " - "Kokkos::sort once a performant implementation is availabe. If you see " - "this message and need sort on CUDA devices with clang compiler please " - "get in touch by opening an issue on the Parthenon GitHub repo."); -#else +#if defined(KOKKOS_ENABLE_CUDA) && !defined(__clang__) thrust::device_ptr first_d = thrust::device_pointer_cast(data.data()) + min_idx; thrust::device_ptr last_d = thrust::device_pointer_cast(data.data()) + max_idx + 1; thrust::sort(first_d, last_d); -#endif - auto data_h = Kokkos::create_mirror_view_and_copy(HostMemSpace(), data); - std::sort(data_h.data() + min_idx, data_h.data() + max_idx + 1); - Kokkos::deep_copy(data, data_h); - // TODO(BRR) With Kokkos 4.4, switch to Kokkos::sort - // auto sub_data = Kokkos::subview(data, std::make_pair(min_idx, max_idx + 1)); - // Kokkos::sort(sub_data); #else - if (std::is_same::value) { - std::sort(data.data() + min_idx, data.data() + max_idx + 1); - } else { - PARTHENON_FAIL("sort is not supported outside of CPU or NVIDIA GPU. If you need sort " - "support on other devices, e.g., AMD or Intel GPUs, please get in " - "touch by opening an issue on the Parthenon GitHub."); - } + auto sub_data = Kokkos::subview(data, std::make_pair(min_idx, max_idx + 1)); + Kokkos::sort(sub_data); #endif // KOKKOS_ENABLE_CUDA }