From 8dea79748652ecb427ac749968de6ce4191ff387 Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Mon, 20 Jan 2025 19:32:28 +0800 Subject: [PATCH 1/9] Update how to management host UDF instance Signed-off-by: Chong Gao --- .../cudf/detail/aggregation/aggregation.hpp | 2 +- cpp/src/groupby/sort/host_udf_aggregation.cpp | 18 +++++++++--------- .../main/java/ai/rapids/cudf/Aggregation.java | 4 +++- .../java/ai/rapids/cudf/HostUDFWrapper.java | 11 ++++++++--- java/src/main/native/src/AggregationJni.cpp | 2 +- 5 files changed, 22 insertions(+), 15 deletions(-) diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp index 5574ed6ea6e..4ef65e9d6ca 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.hpp +++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp @@ -978,7 +978,7 @@ class host_udf_aggregation final : public groupby_aggregation, // Need to define the constructor and destructor in a separate source file where we have the // complete declaration of `host_udf_base`. - explicit host_udf_aggregation(std::unique_ptr udf_ptr_); + explicit host_udf_aggregation(host_udf_base* raw_udf_ptr_); ~host_udf_aggregation() override; [[nodiscard]] bool is_equal(aggregation const& _other) const override; diff --git a/cpp/src/groupby/sort/host_udf_aggregation.cpp b/cpp/src/groupby/sort/host_udf_aggregation.cpp index 6f1fe80c4bd..09b3bc934e0 100644 --- a/cpp/src/groupby/sort/host_udf_aggregation.cpp +++ b/cpp/src/groupby/sort/host_udf_aggregation.cpp @@ -21,10 +21,10 @@ namespace cudf { namespace detail { -host_udf_aggregation::host_udf_aggregation(std::unique_ptr udf_ptr_) - : aggregation{HOST_UDF}, udf_ptr{std::move(udf_ptr_)} +host_udf_aggregation::host_udf_aggregation(std::unique_ptr raw_udf_ptr_) + : aggregation{HOST_UDF}, udf_ptr(raw_udf_ptr_) { - CUDF_EXPECTS(udf_ptr != nullptr, "Invalid host_udf_base instance."); + CUDF_EXPECTS(raw_udf_ptr_ != nullptr, "Invalid host_udf_base instance."); } host_udf_aggregation::~host_udf_aggregation() = default; @@ -49,17 +49,17 @@ std::unique_ptr host_udf_aggregation::clone() const } // namespace detail template -std::unique_ptr make_host_udf_aggregation(std::unique_ptr udf_ptr_) +std::unique_ptr make_host_udf_aggregation(host_udf_base* raw_udf_ptr_) { - return std::make_unique(std::move(udf_ptr_)); + return std::make_unique(raw_udf_ptr_); } template CUDF_EXPORT std::unique_ptr make_host_udf_aggregation( - std::unique_ptr); + host_udf_base*); template CUDF_EXPORT std::unique_ptr - make_host_udf_aggregation(std::unique_ptr); +make_host_udf_aggregation(host_udf_base*); template CUDF_EXPORT std::unique_ptr - make_host_udf_aggregation(std::unique_ptr); +make_host_udf_aggregation(host_udf_base*); template CUDF_EXPORT std::unique_ptr - make_host_udf_aggregation(std::unique_ptr); +make_host_udf_aggregation(host_udf_base*); } // namespace cudf diff --git a/java/src/main/java/ai/rapids/cudf/Aggregation.java b/java/src/main/java/ai/rapids/cudf/Aggregation.java index c07a58ed8a5..36d05fd8576 100644 --- a/java/src/main/java/ai/rapids/cudf/Aggregation.java +++ b/java/src/main/java/ai/rapids/cudf/Aggregation.java @@ -396,7 +396,9 @@ private HostUDFAggregation(HostUDFWrapper wrapper) { @Override long createNativeInstance() { - return Aggregation.createHostUDFAgg(wrapper.udfNativeHandle); + // The created host Agg instance owns an unique ptr which holds the the UDF instance. + // When a host Agg instance is released, the UDF instance is also released. + return Aggregation.createHostUDFAgg(wrapper.createUDFInstance()); } @Override diff --git a/java/src/main/java/ai/rapids/cudf/HostUDFWrapper.java b/java/src/main/java/ai/rapids/cudf/HostUDFWrapper.java index 124f2c99188..bb53b02c1ea 100644 --- a/java/src/main/java/ai/rapids/cudf/HostUDFWrapper.java +++ b/java/src/main/java/ai/rapids/cudf/HostUDFWrapper.java @@ -30,7 +30,12 @@ public abstract class HostUDFWrapper implements AutoCloseable { public final long udfNativeHandle; - public HostUDFWrapper(long udfNativeHandle) { - this.udfNativeHandle = udfNativeHandle; - } + /** + * Call into JNI and create a derived UDF C++ instance. + * Note: This function should only be called in HostUDFAggregation.createNativeInstance, + * Then the instance created by `HostUDFAggregation.createNativeInstance` owns this UDFinstance. + * The instance created by `HostUDFAggregation.createNativeInstance` is managed by framework. In + * this way, we can simplify the resource management. + */ + abstract long createUDFInstance(); } diff --git a/java/src/main/native/src/AggregationJni.cpp b/java/src/main/native/src/AggregationJni.cpp index dd41c677761..ac8688a2c80 100644 --- a/java/src/main/native/src/AggregationJni.cpp +++ b/java/src/main/native/src/AggregationJni.cpp @@ -308,7 +308,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Aggregation_createHostUDFAgg(JNIEnv* try { cudf::jni::auto_set_device(env); auto const udf_ptr = reinterpret_cast(udf_native_handle); - auto output = cudf::make_host_udf_aggregation(udf_ptr->clone()); + auto output = cudf::make_host_udf_aggregation(udf_ptr); return reinterpret_cast(output.release()); } CATCH_STD(env, 0); From 7e59794665da3b0d17a71cb2166bd1d88c623bf4 Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Mon, 20 Jan 2025 20:54:49 +0800 Subject: [PATCH 2/9] Fix compile error --- cpp/src/groupby/sort/host_udf_aggregation.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/groupby/sort/host_udf_aggregation.cpp b/cpp/src/groupby/sort/host_udf_aggregation.cpp index 09b3bc934e0..3401fe523cd 100644 --- a/cpp/src/groupby/sort/host_udf_aggregation.cpp +++ b/cpp/src/groupby/sort/host_udf_aggregation.cpp @@ -21,7 +21,7 @@ namespace cudf { namespace detail { -host_udf_aggregation::host_udf_aggregation(std::unique_ptr raw_udf_ptr_) +host_udf_aggregation::host_udf_aggregation(host_udf_base* raw_udf_ptr_) : aggregation{HOST_UDF}, udf_ptr(raw_udf_ptr_) { CUDF_EXPECTS(raw_udf_ptr_ != nullptr, "Invalid host_udf_base instance."); @@ -43,7 +43,7 @@ size_t host_udf_aggregation::do_hash() const std::unique_ptr host_udf_aggregation::clone() const { - return std::make_unique(udf_ptr->clone()); + return std::make_unique(udf_ptr->clone().get()); } } // namespace detail From dc5b518c5c5a9a0827d0681ee191f1d7871f30de Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Tue, 21 Jan 2025 11:13:30 +0800 Subject: [PATCH 3/9] Fix UDF test cases --- cpp/tests/groupby/host_udf_example_tests.cu | 8 ++++---- cpp/tests/reductions/host_udf_example_tests.cu | 14 +++++++------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/cpp/tests/groupby/host_udf_example_tests.cu b/cpp/tests/groupby/host_udf_example_tests.cu index e1ded37d8a7..903b362406c 100644 --- a/cpp/tests/groupby/host_udf_example_tests.cu +++ b/cpp/tests/groupby/host_udf_example_tests.cu @@ -175,8 +175,8 @@ TEST_F(HostUDFGroupbyExampleTest, SimpleInput) auto const keys = int32s_col{0, 1, 2, 0, 1, 2, 0, 1, 2, 0}; auto const vals = doubles_col{{0.0, null, 2.0, 3.0, null, 5.0, null, null, 8.0, 9.0}, {true, false, true, true, false, true, false, false, true, true}}; - auto agg = cudf::make_host_udf_aggregation( - std::make_unique()); + auto agg = + cudf::make_host_udf_aggregation(new host_udf_groupby_example()); std::vector requests; requests.emplace_back(); @@ -202,8 +202,8 @@ TEST_F(HostUDFGroupbyExampleTest, EmptyInput) { auto const keys = int32s_col{}; auto const vals = doubles_col{}; - auto agg = cudf::make_host_udf_aggregation( - std::make_unique()); + auto agg = + cudf::make_host_udf_aggregation < cudf::groupby_aggregation >> (new host_udf_groupby_example()); std::vector requests; requests.emplace_back(); diff --git a/cpp/tests/reductions/host_udf_example_tests.cu b/cpp/tests/reductions/host_udf_example_tests.cu index 67b88c5306b..d4a9fece297 100644 --- a/cpp/tests/reductions/host_udf_example_tests.cu +++ b/cpp/tests/reductions/host_udf_example_tests.cu @@ -140,8 +140,8 @@ struct HostUDFReductionExampleTest : cudf::test::BaseFixture {}; TEST_F(HostUDFReductionExampleTest, SimpleInput) { auto const vals = doubles_col{0.0, 1.0, 2.0, 3.0, 4.0, 5.0}; - auto const agg = cudf::make_host_udf_aggregation( - std::make_unique()); + auto const agg = + cudf::make_host_udf_aggregation(new host_udf_reduction_example()); auto const reduced = cudf::reduce(vals, *agg, cudf::data_type{cudf::type_id::INT64}, @@ -158,8 +158,8 @@ TEST_F(HostUDFReductionExampleTest, SimpleInput) TEST_F(HostUDFReductionExampleTest, EmptyInput) { auto const vals = doubles_col{}; - auto const agg = cudf::make_host_udf_aggregation( - std::make_unique()); + auto const agg = + cudf::make_host_udf_aggregation(new host_udf_reduction_example()); auto const reduced = cudf::reduce(vals, *agg, cudf::data_type{cudf::type_id::INT64}, @@ -324,7 +324,7 @@ TEST_F(HostUDFSegmentedReductionExampleTest, SimpleInput) {true, false, true, true, false, true, false, false, true, true}}; auto const offsets = int32s_col{0, 3, 5, 10}.release(); auto const agg = cudf::make_host_udf_aggregation( - std::make_unique()); + new host_udf_segmented_reduction_example()); // Test without init value. { @@ -388,7 +388,7 @@ TEST_F(HostUDFSegmentedReductionExampleTest, EmptySegments) auto const vals = doubles_col{}; auto const offsets = int32s_col{0, 0, 0, 0}.release(); auto const agg = cudf::make_host_udf_aggregation( - std::make_unique()); + new host_udf_segmented_reduction_example()); auto const result = cudf::segmented_reduce( vals, cudf::device_span(offsets->view().begin(), offsets->size()), @@ -407,7 +407,7 @@ TEST_F(HostUDFSegmentedReductionExampleTest, EmptyInput) auto const vals = doubles_col{}; auto const offsets = int32s_col{}.release(); auto const agg = cudf::make_host_udf_aggregation( - std::make_unique()); + new host_udf_segmented_reduction_example()); auto const result = cudf::segmented_reduce( vals, cudf::device_span(offsets->view().begin(), offsets->size()), From 17211d13fa645ae035c002e0b5ea863fa3cb7bd3 Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Tue, 21 Jan 2025 14:09:51 +0800 Subject: [PATCH 4/9] Fix compile errors --- cpp/include/cudf/aggregation.hpp | 2 +- cpp/tests/groupby/host_udf_example_tests.cu | 2 +- cpp/tests/groupby/host_udf_tests.cpp | 4 ++-- .../main/java/ai/rapids/cudf/Aggregation.java | 4 ++-- .../java/ai/rapids/cudf/HostUDFWrapper.java | 19 ++++++++++--------- java/src/main/native/src/AggregationJni.cpp | 4 ++-- 6 files changed, 18 insertions(+), 17 deletions(-) diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index 2b2a660bed7..2b035461a23 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -610,7 +610,7 @@ class host_udf_base; * @return A HOST_UDF aggregation object */ template -std::unique_ptr make_host_udf_aggregation(std::unique_ptr host_udf); +std::unique_ptr make_host_udf_aggregation(host_udf_base* host_udf); /** * @brief Factory to create a MERGE_LISTS aggregation. diff --git a/cpp/tests/groupby/host_udf_example_tests.cu b/cpp/tests/groupby/host_udf_example_tests.cu index 903b362406c..a0f7e4c85ba 100644 --- a/cpp/tests/groupby/host_udf_example_tests.cu +++ b/cpp/tests/groupby/host_udf_example_tests.cu @@ -203,7 +203,7 @@ TEST_F(HostUDFGroupbyExampleTest, EmptyInput) auto const keys = int32s_col{}; auto const vals = doubles_col{}; auto agg = - cudf::make_host_udf_aggregation < cudf::groupby_aggregation >> (new host_udf_groupby_example()); + cudf::make_host_udf_aggregation(new host_udf_groupby_example()); std::vector requests; requests.emplace_back(); diff --git a/cpp/tests/groupby/host_udf_tests.cpp b/cpp/tests/groupby/host_udf_tests.cpp index 17da28cdefc..754f03835a5 100644 --- a/cpp/tests/groupby/host_udf_tests.cpp +++ b/cpp/tests/groupby/host_udf_tests.cpp @@ -140,7 +140,7 @@ TEST_F(HostUDFTest, GroupbyBuiltinInput) auto const keys = int32s_col{0, 1, 2}; auto const vals = int32s_col{0, 1, 2}; auto agg = cudf::make_host_udf_aggregation( - std::make_unique(__LINE__, &test_run, /*test_other_agg*/ false)); + new host_udf_groupby_test(__LINE__, &test_run, /*test_other_agg*/ false)); std::vector requests; requests.emplace_back(); @@ -163,7 +163,7 @@ TEST_F(HostUDFTest, GroupbyWithCallingOtherAggregations) for (int i = 0; i < NUM_RANDOM_TESTS; ++i) { bool test_run = false; auto agg = cudf::make_host_udf_aggregation( - std::make_unique(__LINE__, &test_run, /*test_other_agg*/ true)); + new host_udf_groupby_test(__LINE__, &test_run, /*test_other_agg*/ true)); std::vector requests; requests.emplace_back(); diff --git a/java/src/main/java/ai/rapids/cudf/Aggregation.java b/java/src/main/java/ai/rapids/cudf/Aggregation.java index 36d05fd8576..66841d9b4ec 100644 --- a/java/src/main/java/ai/rapids/cudf/Aggregation.java +++ b/java/src/main/java/ai/rapids/cudf/Aggregation.java @@ -396,8 +396,8 @@ private HostUDFAggregation(HostUDFWrapper wrapper) { @Override long createNativeInstance() { - // The created host Agg instance owns an unique ptr which holds the the UDF instance. - // When a host Agg instance is released, the UDF instance is also released. + // The created host Agg instance takes the ownership of the passed in UDF instance. + // When a host Agg instance is released, the UDF instance in it is also released. return Aggregation.createHostUDFAgg(wrapper.createUDFInstance()); } diff --git a/java/src/main/java/ai/rapids/cudf/HostUDFWrapper.java b/java/src/main/java/ai/rapids/cudf/HostUDFWrapper.java index bb53b02c1ea..67c0d829a91 100644 --- a/java/src/main/java/ai/rapids/cudf/HostUDFWrapper.java +++ b/java/src/main/java/ai/rapids/cudf/HostUDFWrapper.java @@ -24,18 +24,19 @@ *

* A new host UDF aggregation implementation must extend this class and override the * {@code hashCode} and {@code equals} methods for such purposes. - * In addition, since this class implements {@code AutoCloseable}, the {@code close} method must - * also be overridden to automatically delete the native UDF instance upon class destruction. + * */ -public abstract class HostUDFWrapper implements AutoCloseable { - public final long udfNativeHandle; +public abstract class HostUDFWrapper { /** - * Call into JNI and create a derived UDF C++ instance. - * Note: This function should only be called in HostUDFAggregation.createNativeInstance, - * Then the instance created by `HostUDFAggregation.createNativeInstance` owns this UDFinstance. - * The instance created by `HostUDFAggregation.createNativeInstance` is managed by framework. In - * this way, we can simplify the resource management. + * Call into native code and create a derived host UDF instance. + * Note: This function MUST only be called in `HostUDFAggregation.createNativeInstance`, + * Then the aggregation instance created by `HostUDFAggregation.createNativeInstance` owns this UDF + * instance. This host UDF instance will be deleted when the aggregation instance is deleted. The + * aggregation instance is responsible for the lifetime of the host UDF instance. The lifetime of + * the aggregation instance is handle by the framework, e.g.: in the finally block of + * Table.aggregate, it calls `Aggregation.close(aggOperationInstances)` + * */ abstract long createUDFInstance(); } diff --git a/java/src/main/native/src/AggregationJni.cpp b/java/src/main/native/src/AggregationJni.cpp index ac8688a2c80..b844e5fa69f 100644 --- a/java/src/main/native/src/AggregationJni.cpp +++ b/java/src/main/native/src/AggregationJni.cpp @@ -307,8 +307,8 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Aggregation_createHostUDFAgg(JNIEnv* JNI_NULL_CHECK(env, udf_native_handle, "udf_native_handle is null", 0); try { cudf::jni::auto_set_device(env); - auto const udf_ptr = reinterpret_cast(udf_native_handle); - auto output = cudf::make_host_udf_aggregation(udf_ptr); + auto udf_ptr = reinterpret_cast(udf_native_handle); + auto output = cudf::make_host_udf_aggregation(udf_ptr); return reinterpret_cast(output.release()); } CATCH_STD(env, 0); From ae9fa515f925976220dc1cceacf72c34d148763b Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Wed, 22 Jan 2025 07:49:16 +0800 Subject: [PATCH 5/9] Revert unsecure code --- cpp/include/cudf/aggregation.hpp | 2 +- .../cudf/detail/aggregation/aggregation.hpp | 2 +- cpp/src/groupby/sort/host_udf_aggregation.cpp | 20 +++++++++---------- cpp/tests/groupby/host_udf_example_tests.cu | 8 ++++---- cpp/tests/groupby/host_udf_tests.cpp | 4 ++-- .../reductions/host_udf_example_tests.cu | 14 ++++++------- java/src/main/native/src/AggregationJni.cpp | 4 ++-- 7 files changed, 27 insertions(+), 27 deletions(-) diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index 2b035461a23..2b2a660bed7 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -610,7 +610,7 @@ class host_udf_base; * @return A HOST_UDF aggregation object */ template -std::unique_ptr make_host_udf_aggregation(host_udf_base* host_udf); +std::unique_ptr make_host_udf_aggregation(std::unique_ptr host_udf); /** * @brief Factory to create a MERGE_LISTS aggregation. diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp index 4ef65e9d6ca..5574ed6ea6e 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.hpp +++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp @@ -978,7 +978,7 @@ class host_udf_aggregation final : public groupby_aggregation, // Need to define the constructor and destructor in a separate source file where we have the // complete declaration of `host_udf_base`. - explicit host_udf_aggregation(host_udf_base* raw_udf_ptr_); + explicit host_udf_aggregation(std::unique_ptr udf_ptr_); ~host_udf_aggregation() override; [[nodiscard]] bool is_equal(aggregation const& _other) const override; diff --git a/cpp/src/groupby/sort/host_udf_aggregation.cpp b/cpp/src/groupby/sort/host_udf_aggregation.cpp index 3401fe523cd..6f1fe80c4bd 100644 --- a/cpp/src/groupby/sort/host_udf_aggregation.cpp +++ b/cpp/src/groupby/sort/host_udf_aggregation.cpp @@ -21,10 +21,10 @@ namespace cudf { namespace detail { -host_udf_aggregation::host_udf_aggregation(host_udf_base* raw_udf_ptr_) - : aggregation{HOST_UDF}, udf_ptr(raw_udf_ptr_) +host_udf_aggregation::host_udf_aggregation(std::unique_ptr udf_ptr_) + : aggregation{HOST_UDF}, udf_ptr{std::move(udf_ptr_)} { - CUDF_EXPECTS(raw_udf_ptr_ != nullptr, "Invalid host_udf_base instance."); + CUDF_EXPECTS(udf_ptr != nullptr, "Invalid host_udf_base instance."); } host_udf_aggregation::~host_udf_aggregation() = default; @@ -43,23 +43,23 @@ size_t host_udf_aggregation::do_hash() const std::unique_ptr host_udf_aggregation::clone() const { - return std::make_unique(udf_ptr->clone().get()); + return std::make_unique(udf_ptr->clone()); } } // namespace detail template -std::unique_ptr make_host_udf_aggregation(host_udf_base* raw_udf_ptr_) +std::unique_ptr make_host_udf_aggregation(std::unique_ptr udf_ptr_) { - return std::make_unique(raw_udf_ptr_); + return std::make_unique(std::move(udf_ptr_)); } template CUDF_EXPORT std::unique_ptr make_host_udf_aggregation( - host_udf_base*); + std::unique_ptr); template CUDF_EXPORT std::unique_ptr -make_host_udf_aggregation(host_udf_base*); + make_host_udf_aggregation(std::unique_ptr); template CUDF_EXPORT std::unique_ptr -make_host_udf_aggregation(host_udf_base*); + make_host_udf_aggregation(std::unique_ptr); template CUDF_EXPORT std::unique_ptr -make_host_udf_aggregation(host_udf_base*); + make_host_udf_aggregation(std::unique_ptr); } // namespace cudf diff --git a/cpp/tests/groupby/host_udf_example_tests.cu b/cpp/tests/groupby/host_udf_example_tests.cu index a0f7e4c85ba..e1ded37d8a7 100644 --- a/cpp/tests/groupby/host_udf_example_tests.cu +++ b/cpp/tests/groupby/host_udf_example_tests.cu @@ -175,8 +175,8 @@ TEST_F(HostUDFGroupbyExampleTest, SimpleInput) auto const keys = int32s_col{0, 1, 2, 0, 1, 2, 0, 1, 2, 0}; auto const vals = doubles_col{{0.0, null, 2.0, 3.0, null, 5.0, null, null, 8.0, 9.0}, {true, false, true, true, false, true, false, false, true, true}}; - auto agg = - cudf::make_host_udf_aggregation(new host_udf_groupby_example()); + auto agg = cudf::make_host_udf_aggregation( + std::make_unique()); std::vector requests; requests.emplace_back(); @@ -202,8 +202,8 @@ TEST_F(HostUDFGroupbyExampleTest, EmptyInput) { auto const keys = int32s_col{}; auto const vals = doubles_col{}; - auto agg = - cudf::make_host_udf_aggregation(new host_udf_groupby_example()); + auto agg = cudf::make_host_udf_aggregation( + std::make_unique()); std::vector requests; requests.emplace_back(); diff --git a/cpp/tests/groupby/host_udf_tests.cpp b/cpp/tests/groupby/host_udf_tests.cpp index 754f03835a5..17da28cdefc 100644 --- a/cpp/tests/groupby/host_udf_tests.cpp +++ b/cpp/tests/groupby/host_udf_tests.cpp @@ -140,7 +140,7 @@ TEST_F(HostUDFTest, GroupbyBuiltinInput) auto const keys = int32s_col{0, 1, 2}; auto const vals = int32s_col{0, 1, 2}; auto agg = cudf::make_host_udf_aggregation( - new host_udf_groupby_test(__LINE__, &test_run, /*test_other_agg*/ false)); + std::make_unique(__LINE__, &test_run, /*test_other_agg*/ false)); std::vector requests; requests.emplace_back(); @@ -163,7 +163,7 @@ TEST_F(HostUDFTest, GroupbyWithCallingOtherAggregations) for (int i = 0; i < NUM_RANDOM_TESTS; ++i) { bool test_run = false; auto agg = cudf::make_host_udf_aggregation( - new host_udf_groupby_test(__LINE__, &test_run, /*test_other_agg*/ true)); + std::make_unique(__LINE__, &test_run, /*test_other_agg*/ true)); std::vector requests; requests.emplace_back(); diff --git a/cpp/tests/reductions/host_udf_example_tests.cu b/cpp/tests/reductions/host_udf_example_tests.cu index d4a9fece297..67b88c5306b 100644 --- a/cpp/tests/reductions/host_udf_example_tests.cu +++ b/cpp/tests/reductions/host_udf_example_tests.cu @@ -140,8 +140,8 @@ struct HostUDFReductionExampleTest : cudf::test::BaseFixture {}; TEST_F(HostUDFReductionExampleTest, SimpleInput) { auto const vals = doubles_col{0.0, 1.0, 2.0, 3.0, 4.0, 5.0}; - auto const agg = - cudf::make_host_udf_aggregation(new host_udf_reduction_example()); + auto const agg = cudf::make_host_udf_aggregation( + std::make_unique()); auto const reduced = cudf::reduce(vals, *agg, cudf::data_type{cudf::type_id::INT64}, @@ -158,8 +158,8 @@ TEST_F(HostUDFReductionExampleTest, SimpleInput) TEST_F(HostUDFReductionExampleTest, EmptyInput) { auto const vals = doubles_col{}; - auto const agg = - cudf::make_host_udf_aggregation(new host_udf_reduction_example()); + auto const agg = cudf::make_host_udf_aggregation( + std::make_unique()); auto const reduced = cudf::reduce(vals, *agg, cudf::data_type{cudf::type_id::INT64}, @@ -324,7 +324,7 @@ TEST_F(HostUDFSegmentedReductionExampleTest, SimpleInput) {true, false, true, true, false, true, false, false, true, true}}; auto const offsets = int32s_col{0, 3, 5, 10}.release(); auto const agg = cudf::make_host_udf_aggregation( - new host_udf_segmented_reduction_example()); + std::make_unique()); // Test without init value. { @@ -388,7 +388,7 @@ TEST_F(HostUDFSegmentedReductionExampleTest, EmptySegments) auto const vals = doubles_col{}; auto const offsets = int32s_col{0, 0, 0, 0}.release(); auto const agg = cudf::make_host_udf_aggregation( - new host_udf_segmented_reduction_example()); + std::make_unique()); auto const result = cudf::segmented_reduce( vals, cudf::device_span(offsets->view().begin(), offsets->size()), @@ -407,7 +407,7 @@ TEST_F(HostUDFSegmentedReductionExampleTest, EmptyInput) auto const vals = doubles_col{}; auto const offsets = int32s_col{}.release(); auto const agg = cudf::make_host_udf_aggregation( - new host_udf_segmented_reduction_example()); + std::make_unique()); auto const result = cudf::segmented_reduce( vals, cudf::device_span(offsets->view().begin(), offsets->size()), diff --git a/java/src/main/native/src/AggregationJni.cpp b/java/src/main/native/src/AggregationJni.cpp index b844e5fa69f..dd41c677761 100644 --- a/java/src/main/native/src/AggregationJni.cpp +++ b/java/src/main/native/src/AggregationJni.cpp @@ -307,8 +307,8 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Aggregation_createHostUDFAgg(JNIEnv* JNI_NULL_CHECK(env, udf_native_handle, "udf_native_handle is null", 0); try { cudf::jni::auto_set_device(env); - auto udf_ptr = reinterpret_cast(udf_native_handle); - auto output = cudf::make_host_udf_aggregation(udf_ptr); + auto const udf_ptr = reinterpret_cast(udf_native_handle); + auto output = cudf::make_host_udf_aggregation(udf_ptr->clone()); return reinterpret_cast(output.release()); } CATCH_STD(env, 0); From 70200378e7adf06648232c2300ec68bedcaadb66 Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Wed, 22 Jan 2025 08:36:19 +0800 Subject: [PATCH 6/9] Update how to manage UDF instance --- .../main/java/ai/rapids/cudf/Aggregation.java | 13 ++++-- .../java/ai/rapids/cudf/HostUDFWrapper.java | 46 +++++++++++++++---- .../src/main/native/src/HostUDFWrapperJni.cpp | 34 ++++++++++++++ 3 files changed, 81 insertions(+), 12 deletions(-) create mode 100644 java/src/main/native/src/HostUDFWrapperJni.cpp diff --git a/java/src/main/java/ai/rapids/cudf/Aggregation.java b/java/src/main/java/ai/rapids/cudf/Aggregation.java index 66841d9b4ec..42fa871abeb 100644 --- a/java/src/main/java/ai/rapids/cudf/Aggregation.java +++ b/java/src/main/java/ai/rapids/cudf/Aggregation.java @@ -396,9 +396,16 @@ private HostUDFAggregation(HostUDFWrapper wrapper) { @Override long createNativeInstance() { - // The created host Agg instance takes the ownership of the passed in UDF instance. - // When a host Agg instance is released, the UDF instance in it is also released. - return Aggregation.createHostUDFAgg(wrapper.createUDFInstance()); + long udf = 0; + try { + udf = wrapper.createUDFInstance(); + return Aggregation.createHostUDFAgg(udf); + } finally { + // a new UDF is cloned in `createHostUDFAgg`, here should close the UDF instance. + if (udf != 0) { + HostUDFWrapper.closeUDFInstance(udf); + } + } } @Override diff --git a/java/src/main/java/ai/rapids/cudf/HostUDFWrapper.java b/java/src/main/java/ai/rapids/cudf/HostUDFWrapper.java index 67c0d829a91..f0ef57e6089 100644 --- a/java/src/main/java/ai/rapids/cudf/HostUDFWrapper.java +++ b/java/src/main/java/ai/rapids/cudf/HostUDFWrapper.java @@ -19,24 +19,52 @@ /** * A wrapper around native host UDF aggregations. *

- * This class is used to store the native handle of a host UDF aggregation and is used as + * This class is used to create the native handle of a host UDF aggregation and is used as * a proxy object to compute hash code and compare two host UDF aggregations for equality. *

* A new host UDF aggregation implementation must extend this class and override the - * {@code hashCode} and {@code equals} methods for such purposes. + * {@code computeHashCode} and {@code isEqual} methods for such purposes. * */ public abstract class HostUDFWrapper { /** - * Call into native code and create a derived host UDF instance. - * Note: This function MUST only be called in `HostUDFAggregation.createNativeInstance`, - * Then the aggregation instance created by `HostUDFAggregation.createNativeInstance` owns this UDF - * instance. This host UDF instance will be deleted when the aggregation instance is deleted. The - * aggregation instance is responsible for the lifetime of the host UDF instance. The lifetime of - * the aggregation instance is handle by the framework, e.g.: in the finally block of - * Table.aggregate, it calls `Aggregation.close(aggOperationInstances)` + * Create a derived host UDF native instance. + * The instance created by this function MUST be closed by `closeUDFInstance` + *

Typical usage, refer to Aggregation.java:

+ *
+   * long udf = 0;
+   * try {
+   *     udf = wrapper.createUDFInstance();
+   *     return Aggregation.createHostUDFAgg(udf);
+   * } finally {
+   *     // a new UDF is cloned in `createHostUDFAgg`, here should close the UDF instance.
+   *     if (udf != 0) {
+   *         HostUDFWrapper.closeUDFInstance(udf);
+   *     }
+   * }
+   * 
* */ abstract long createUDFInstance(); + + /** + * Close the UDF instance created by `createUDFInstance` + * @param hostUDFInstance the UDF instance + */ + static native void closeUDFInstance(long hostUDFInstance); + + abstract int computeHashCode(); + + @Override + public int hashCode() { + return computeHashCode(); + } + + abstract boolean isEqual(Object obj); + + @Override + public boolean equals(Object obj) { + return isEqual(obj); + } } diff --git a/java/src/main/native/src/HostUDFWrapperJni.cpp b/java/src/main/native/src/HostUDFWrapperJni.cpp new file mode 100644 index 00000000000..31cb3f93b43 --- /dev/null +++ b/java/src/main/native/src/HostUDFWrapperJni.cpp @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "cudf_jni_apis.hpp" + +#include + +extern "C" { + +JNIEXPORT void JNICALL Java_ai_rapids_cudf_HostUDFWrapper_closeUDFInstance(JNIEnv* env, + jclass class_object, + jlong ptr) +{ + try { + auto to_del = reinterpret_cast(ptr); + delete to_del; + } + CATCH_STD(env, ); +} + +} // extern "C" From 5444332b66f7c732b01e4f21eca080fc19abad5d Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Wed, 22 Jan 2025 09:40:38 +0800 Subject: [PATCH 7/9] Update Copyright year --- java/src/main/java/ai/rapids/cudf/HostUDFWrapper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/src/main/java/ai/rapids/cudf/HostUDFWrapper.java b/java/src/main/java/ai/rapids/cudf/HostUDFWrapper.java index f0ef57e6089..112d4a0f30c 100644 --- a/java/src/main/java/ai/rapids/cudf/HostUDFWrapper.java +++ b/java/src/main/java/ai/rapids/cudf/HostUDFWrapper.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From 2f5a0dc35170081d637bd8f1ad32f10fe12eebeb Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Wed, 22 Jan 2025 11:36:48 +0800 Subject: [PATCH 8/9] Fix --- .../main/java/ai/rapids/cudf/HostUDFWrapper.java | 15 ++++++++++----- java/src/main/native/CMakeLists.txt | 1 + java/src/main/native/src/HostUDFWrapperJni.cpp | 6 +++--- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/HostUDFWrapper.java b/java/src/main/java/ai/rapids/cudf/HostUDFWrapper.java index 112d4a0f30c..6fe1e859e2c 100644 --- a/java/src/main/java/ai/rapids/cudf/HostUDFWrapper.java +++ b/java/src/main/java/ai/rapids/cudf/HostUDFWrapper.java @@ -46,25 +46,30 @@ public abstract class HostUDFWrapper { * * */ - abstract long createUDFInstance(); + public abstract long createUDFInstance(); /** - * Close the UDF instance created by `createUDFInstance` + * Close the derived UDF instance created by `createUDFInstance`. * @param hostUDFInstance the UDF instance */ - static native void closeUDFInstance(long hostUDFInstance); + public static void closeUDFInstance(long hostUDFInstance) { + close(hostUDFInstance); + } - abstract int computeHashCode(); + public abstract int computeHashCode(); @Override public int hashCode() { return computeHashCode(); } - abstract boolean isEqual(Object obj); + public abstract boolean isEqual(Object obj); @Override public boolean equals(Object obj) { return isEqual(obj); } + + static native void close(long hostUDFInstance); } + diff --git a/java/src/main/native/CMakeLists.txt b/java/src/main/native/CMakeLists.txt index bd1714aa476..3923d8b45e3 100644 --- a/java/src/main/native/CMakeLists.txt +++ b/java/src/main/native/CMakeLists.txt @@ -148,6 +148,7 @@ add_library( src/DataSourceHelperJni.cpp src/HashJoinJni.cpp src/HostMemoryBufferNativeUtilsJni.cpp + src/HostUDFWrapperJni.cpp src/NvcompJni.cpp src/NvtxRangeJni.cpp src/NvtxUniqueRangeJni.cpp diff --git a/java/src/main/native/src/HostUDFWrapperJni.cpp b/java/src/main/native/src/HostUDFWrapperJni.cpp index 31cb3f93b43..1dd1b483fac 100644 --- a/java/src/main/native/src/HostUDFWrapperJni.cpp +++ b/java/src/main/native/src/HostUDFWrapperJni.cpp @@ -20,9 +20,9 @@ extern "C" { -JNIEXPORT void JNICALL Java_ai_rapids_cudf_HostUDFWrapper_closeUDFInstance(JNIEnv* env, - jclass class_object, - jlong ptr) +JNIEXPORT void JNICALL Java_ai_rapids_cudf_HostUDFWrapper_close(JNIEnv* env, + jclass class_object, + jlong ptr) { try { auto to_del = reinterpret_cast(ptr); From 062a493b8d347a33b9c0a08d256dd3f81766c61a Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Wed, 22 Jan 2025 13:20:56 +0800 Subject: [PATCH 9/9] Fix code style --- java/src/main/java/ai/rapids/cudf/HostUDFWrapper.java | 1 - 1 file changed, 1 deletion(-) diff --git a/java/src/main/java/ai/rapids/cudf/HostUDFWrapper.java b/java/src/main/java/ai/rapids/cudf/HostUDFWrapper.java index 6fe1e859e2c..0bd3ccad0d3 100644 --- a/java/src/main/java/ai/rapids/cudf/HostUDFWrapper.java +++ b/java/src/main/java/ai/rapids/cudf/HostUDFWrapper.java @@ -72,4 +72,3 @@ public boolean equals(Object obj) { static native void close(long hostUDFInstance); } -