From 50b0a66658cd99dea39f6664b64e6a3c9c010e12 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Tue, 18 Feb 2025 17:59:05 +0100 Subject: [PATCH] GH-45572: [C++][Compute] Add rank_normal function --- cpp/src/arrow/CMakeLists.txt | 1 + cpp/src/arrow/compute/kernels/vector_rank.cc | 74 +++++++--- .../arrow/compute/kernels/vector_sort_test.cc | 111 ++++++++++++--- cpp/src/arrow/util/CMakeLists.txt | 1 + cpp/src/arrow/util/math_internal.cc | 130 ++++++++++++++++++ cpp/src/arrow/util/math_internal.h | 36 +++++ cpp/src/arrow/util/math_test.cc | 82 +++++++++++ docs/source/cpp/compute.rst | 7 +- 8 files changed, 405 insertions(+), 37 deletions(-) create mode 100644 cpp/src/arrow/util/math_internal.cc create mode 100644 cpp/src/arrow/util/math_internal.h create mode 100644 cpp/src/arrow/util/math_test.cc diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index eb9860b240f16..b9b8785cbc80a 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -529,6 +529,7 @@ set(ARROW_UTIL_SRCS util/logger.cc util/logging.cc util/key_value_metadata.cc + util/math_internal.cc util/memory.cc util/mutex.cc util/ree_util.cc diff --git a/cpp/src/arrow/compute/kernels/vector_rank.cc b/cpp/src/arrow/compute/kernels/vector_rank.cc index 2efc61c2e6ce5..d1323c2030223 100644 --- a/cpp/src/arrow/compute/kernels/vector_rank.cc +++ b/cpp/src/arrow/compute/kernels/vector_rank.cc @@ -21,6 +21,7 @@ #include "arrow/compute/function.h" #include "arrow/compute/kernels/vector_sort_internal.h" #include "arrow/compute/registry.h" +#include "arrow/util/math_internal.h" namespace arrow::compute::internal { @@ -62,16 +63,6 @@ void MarkDuplicates(const NullPartitionResult& sorted, ValueSelector&& value_sel } } -const RankOptions* GetDefaultRankOptions() { - static const auto kDefaultRankOptions = RankOptions::Defaults(); - return &kDefaultRankOptions; -} - -const RankQuantileOptions* GetDefaultQuantileRankOptions() { - static const auto kDefaultQuantileRankOptions = RankQuantileOptions::Defaults(); - return &kDefaultQuantileRankOptions; -} - template Result DoSortAndMarkDuplicate( ExecContext* ctx, uint64_t* indices_begin, uint64_t* indices_end, const Array& input, @@ -164,8 +155,9 @@ class SortAndMarkDuplicate : public TypeVisitor { NullPartitionResult sorted_{}; }; -// A helper class that emits rankings for the "rank_quantile" function -struct QuantileRanker { +// A CRTP-based helper class for "rank_normal" and "rank_quantile" +template +struct BaseQuantileRanker { Result CreateRankings(ExecContext* ctx, const NullPartitionResult& sorted) { const int64_t length = sorted.overall_end() - sorted.overall_begin(); ARROW_ASSIGN_OR_RAISE(auto rankings, @@ -187,10 +179,11 @@ struct QuantileRanker { } // The run length, i.e. the frequency of the current value int64_t freq = run_end - it; - double quantile = (cum_freq + 0.5 * freq) / static_cast(length); + const double quantile = (cum_freq + 0.5 * freq) / static_cast(length); + const double value = Derived::TransformValue(quantile); // Output quantile rank values for (; it < run_end; ++it) { - out_begin[original_index(*it)] = quantile; + out_begin[original_index(*it)] = value; } cum_freq += freq; } @@ -199,6 +192,18 @@ struct QuantileRanker { } }; +// A derived class that emits rankings for the "rank_quantile" function +struct QuantileRanker : public BaseQuantileRanker { + static double TransformValue(double quantile) { return quantile; } +}; + +// A derived class that emits rankings for the "rank_normal" function +struct NormalRanker : public BaseQuantileRanker { + static double TransformValue(double quantile) { + return ::arrow::internal::NormalPPF(quantile); + } +}; + // A helper class that emits rankings for the "rank" function struct OrdinalRanker { explicit OrdinalRanker(RankOptions::Tiebreaker tiebreaker) : tiebreaker_(tiebreaker) {} @@ -294,6 +299,20 @@ const FunctionDoc rank_quantile_doc( "The handling of nulls and NaNs can be changed in RankQuantileOptions."), {"input"}, "RankQuantileOptions"); +const FunctionDoc rank_normal_doc( + "Compute normal (gaussian) ranks of an array", + ("This function computes a normal (gaussian) rank of the input array.\n" + "By default, null values are considered greater than any other value and\n" + "are therefore sorted at the end of the input. For floating-point types,\n" + "NaNs are considered greater than any other non-null value, but smaller\n" + "than null values.\n" + "The results are finite real values. They are obtained as if first\n" + "calling the \"rank_quantile\" function and then applying the normal\n" + "percent-point function (PPF) to the resulting quantile values.\n" + "\n" + "The handling of nulls and NaNs can be changed in RankQuantileOptions."), + {"input"}, "RankQuantileOptions"); + template class RankMetaFunctionBase : public MetaFunction { public: @@ -361,11 +380,14 @@ class RankMetaFunction : public RankMetaFunctionBase { } RankMetaFunction() - : RankMetaFunctionBase("rank", Arity::Unary(), rank_doc, GetDefaultRankOptions()) {} + : RankMetaFunctionBase("rank", Arity::Unary(), rank_doc, &kDefaultOptions) {} + + static inline const auto kDefaultOptions = RankOptions::Defaults(); }; class RankQuantileMetaFunction : public RankMetaFunctionBase { public: + using Base = RankMetaFunctionBase; using FunctionOptionsType = RankQuantileOptions; using RankerType = QuantileRanker; @@ -375,7 +397,26 @@ class RankQuantileMetaFunction : public RankMetaFunctionBase { + public: + using Base = RankMetaFunctionBase; + using FunctionOptionsType = RankQuantileOptions; + using RankerType = NormalRanker; + + static bool NeedsDuplicates(const RankQuantileOptions&) { return true; } + + static RankerType GetRanker(const RankQuantileOptions& options) { return RankerType(); } + + RankNormalMetaFunction() + : RankMetaFunctionBase("rank_normal", Arity::Unary(), rank_normal_doc, + &kDefaultOptions) {} + + static inline const auto kDefaultOptions = RankQuantileOptions::Defaults(); }; } // namespace @@ -383,6 +424,7 @@ class RankQuantileMetaFunction : public RankMetaFunctionBaseAddFunction(std::make_shared())); DCHECK_OK(registry->AddFunction(std::make_shared())); + DCHECK_OK(registry->AddFunction(std::make_shared())); } } // namespace arrow::compute::internal diff --git a/cpp/src/arrow/compute/kernels/vector_sort_test.cc b/cpp/src/arrow/compute/kernels/vector_sort_test.cc index dc1c055705d56..2b592cd1a9260 100644 --- a/cpp/src/arrow/compute/kernels/vector_sort_test.cc +++ b/cpp/src/arrow/compute/kernels/vector_sort_test.cc @@ -2205,7 +2205,7 @@ TEST_F(TestNestedSortIndices, SortRecordBatch) { TestSort(GetRecordBatch()); } TEST_F(TestNestedSortIndices, SortTable) { TestSort(GetTable()); } // ---------------------------------------------------------------------- -// Tests for Rank and Quantile Rank +// Tests for Rank, Quantile Rank and Normal Rank class BaseTestRank : public ::testing::Test { protected: @@ -2471,43 +2471,84 @@ TEST_F(TestRank, EmptyChunks) { class TestRankQuantile : public BaseTestRank { public: - void AssertRankQuantile(const DatumVector& datums, SortOrder order, - NullPlacement null_placement, - const std::shared_ptr& expected) { + void AssertRankQuantileGeneric(const std::string& function_name, + const DatumVector& datums, SortOrder order, + NullPlacement null_placement, + const std::shared_ptr& expected) { + ARROW_SCOPED_TRACE("function = ", function_name); const std::vector sort_keys{SortKey("foo", order)}; RankQuantileOptions options(sort_keys, null_placement); ARROW_SCOPED_TRACE("options = ", options.ToString()); for (const auto& datum : datums) { - ASSERT_OK_AND_ASSIGN(auto actual, CallFunction("rank_quantile", {datum}, &options)); + ASSERT_OK_AND_ASSIGN(auto actual, CallFunction(function_name, {datum}, &options)); ValidateOutput(actual); - AssertDatumsEqual(expected, actual, /*verbose=*/true); + if (function_name == "rank_normal") { + // Normal PPF results can only be approximate + auto equal_options = EqualOptions().atol(1e-8); + AssertDatumsApproxEqual(expected, actual, /*verbose=*/true, equal_options); + } else { + AssertDatumsEqual(expected, actual, /*verbose=*/true); + } } } - void AssertRankQuantile(const DatumVector& datums, SortOrder order, - NullPlacement null_placement, const std::string& expected) { - AssertRankQuantile(datums, order, null_placement, ArrayFromJSON(float64(), expected)); + void AssertRankQuantileGeneric(const std::string& function_name, const Datum& datum, + SortOrder order, NullPlacement null_placement, + const std::shared_ptr& expected) { + AssertRankQuantileGeneric(function_name, DatumVector{datum}, order, null_placement, + expected); } - void AssertRankQuantile(SortOrder order, NullPlacement null_placement, - const std::shared_ptr& expected) { - AssertRankQuantile(datums_, order, null_placement, expected); + void AssertRankQuantileGeneric(const std::string& function_name, + const DatumVector& datums, SortOrder order, + NullPlacement null_placement, + const std::string& expected) { + AssertRankQuantileGeneric(function_name, datums, order, null_placement, + ArrayFromJSON(float64(), expected)); } - void AssertRankQuantile(SortOrder order, NullPlacement null_placement, - const std::string& expected) { - AssertRankQuantile(datums_, order, null_placement, - ArrayFromJSON(float64(), expected)); + void AssertRankQuantileGeneric(const std::string& function_name, const Datum& datum, + SortOrder order, NullPlacement null_placement, + const std::string& expected) { + AssertRankQuantileGeneric(function_name, DatumVector{datum}, order, null_placement, + ArrayFromJSON(float64(), expected)); + } + + void AssertRankQuantileGeneric(const std::string& function_name, SortOrder order, + NullPlacement null_placement, + const std::shared_ptr& expected) { + AssertRankQuantileGeneric(function_name, datums_, order, null_placement, expected); + } + + void AssertRankQuantileGeneric(const std::string& function_name, SortOrder order, + NullPlacement null_placement, + const std::string& expected) { + AssertRankQuantileGeneric(function_name, datums_, order, null_placement, + ArrayFromJSON(float64(), expected)); + } + + template + void AssertRankQuantile(Args&&... args) { + AssertRankQuantileGeneric("rank_quantile", std::forward(args)...); + } + + template + void AssertRankNormal(Args&&... args) { + AssertRankQuantileGeneric("rank_normal", std::forward(args)...); } void AssertRankQuantileEmpty(std::shared_ptr type) { for (auto null_placement : AllNullPlacements()) { for (auto order : AllOrders()) { - AssertRankQuantile({ArrayFromJSON(type, "[]")}, order, null_placement, "[]"); - AssertRankQuantile({ArrayFromJSON(type, "[null]")}, order, null_placement, - "[0.5]"); - AssertRankQuantile({ArrayFromJSON(type, "[null, null, null]")}, order, + AssertRankQuantile(ArrayFromJSON(type, "[]"), order, null_placement, "[]"); + AssertRankQuantile(ArrayFromJSON(type, "[null]"), order, null_placement, "[0.5]"); + AssertRankQuantile(ArrayFromJSON(type, "[null, null, null]"), order, null_placement, "[0.5, 0.5, 0.5]"); + + AssertRankNormal(ArrayFromJSON(type, "[]"), order, null_placement, "[]"); + AssertRankNormal(ArrayFromJSON(type, "[null]"), order, null_placement, "[0.0]"); + AssertRankNormal(ArrayFromJSON(type, "[null, null, null]"), order, null_placement, + "[0.0, 0.0, 0.0]"); } } } @@ -2519,6 +2560,12 @@ class TestRankQuantile : public BaseTestRank { "[0.3, 0.8, 0.3, 0.8, 0.3]"); AssertRankQuantile(SortOrder::Descending, null_placement, "[0.7, 0.2, 0.7, 0.2, 0.7]"); + AssertRankNormal(SortOrder::Ascending, null_placement, + "[-0.5244005127080409, 0.8416212335729143, -0.5244005127080409, " + "0.8416212335729143, -0.5244005127080409]"); + AssertRankNormal(SortOrder::Descending, null_placement, + "[0.5244005127080407, -0.8416212335729142, 0.5244005127080407, " + "-0.8416212335729142, 0.5244005127080407]"); } } @@ -2532,6 +2579,19 @@ class TestRankQuantile : public BaseTestRank { "[0.3, 0.9, 0.3, 0.7, 0.3]"); AssertRankQuantile(SortOrder::Descending, NullPlacement::AtEnd, "[0.7, 0.3, 0.7, 0.1, 0.7]"); + + AssertRankNormal(SortOrder::Ascending, NullPlacement::AtStart, + "[-0.5244005127080409, 0.5244005127080407, -0.5244005127080409, " + "1.2815515655446004, -0.5244005127080409]"); + AssertRankNormal(SortOrder::Ascending, NullPlacement::AtEnd, + "[0.5244005127080407, -1.2815515655446004, 0.5244005127080407, " + "-0.5244005127080409, 0.5244005127080407]"); + AssertRankNormal(SortOrder::Descending, NullPlacement::AtStart, + "[-0.5244005127080409, 1.2815515655446004, -0.5244005127080409, " + "0.5244005127080407, -0.5244005127080409]"); + AssertRankNormal(SortOrder::Descending, NullPlacement::AtEnd, + "[0.5244005127080407, -0.5244005127080409, 0.5244005127080407, " + "-1.2815515655446004, 0.5244005127080407]"); } void AssertRankQuantileNumeric(std::shared_ptr type) { @@ -2545,6 +2605,17 @@ class TestRankQuantile : public BaseTestRank { "[0.95, 0.8, 0.8, 0.6, 0.6, 0.35, 0.35, 0.35, 0.15, 0.05]"); AssertRankQuantile(SortOrder::Descending, null_placement, "[0.05, 0.2, 0.2, 0.4, 0.4, 0.65, 0.65, 0.65, 0.85, 0.95]"); + + AssertRankNormal(SortOrder::Ascending, null_placement, + "[1.6448536269514722, 0.8416212335729143, 0.8416212335729143, " + "0.2533471031357997, 0.2533471031357997, -0.38532046640756773, " + "-0.38532046640756773, -0.38532046640756773, -1.0364333894937898, " + "-1.6448536269514729]"); + AssertRankNormal(SortOrder::Descending, null_placement, + "[-1.6448536269514729, -0.8416212335729142, -0.8416212335729142, " + "-0.2533471031357997, -0.2533471031357997, 0.38532046640756773, " + "0.38532046640756773, 0.38532046640756773, 1.0364333894937898, " + "1.6448536269514722]"); } // With nulls diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt index 24a1c1177240d..17eea5532cc91 100644 --- a/cpp/src/arrow/util/CMakeLists.txt +++ b/cpp/src/arrow/util/CMakeLists.txt @@ -66,6 +66,7 @@ add_arrow_test(utility-test list_util_test.cc logger_test.cc logging_test.cc + math_test.cc queue_test.cc range_test.cc ree_util_test.cc diff --git a/cpp/src/arrow/util/math_internal.cc b/cpp/src/arrow/util/math_internal.cc new file mode 100644 index 0000000000000..604af45a49649 --- /dev/null +++ b/cpp/src/arrow/util/math_internal.cc @@ -0,0 +1,130 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/util/math_internal.h" + +#include + +#include "arrow/util/logging.h" + +namespace arrow::internal { + +double NormalPPF(double p) { + DCHECK(p >= 0.0 && p <= 1.0); + if (p == 0.0) { + return -HUGE_VAL; + } + if (p == 1.0) { + return HUGE_VAL; + } + + // Algorithm from https://doi.org/10.2307/2347330 + // Wichura, M. J. (1988). + // Algorithm AS 241: The Percentage Points of the Normal Distribution. + // Journal of the Royal Statistical Society. Series C (Applied Statistics), + // 37(3), 477-484. + // + // Copied from the Rust implementation at https://github.com/ankane/dist-rust/ + double q = p - 0.5; + if (std::abs(q) < 0.425) { + double r = 0.180625 - q * q; + return q * + (((((((2.5090809287301226727e3 * r + 3.3430575583588128105e4) * r + + 6.7265770927008700853e4) * + r + + 4.5921953931549871457e4) * + r + + 1.3731693765509461125e4) * + r + + 1.9715909503065514427e3) * + r + + 1.3314166789178437745e2) * + r + + 3.3871328727963666080e0) / + (((((((5.2264952788528545610e3 * r + 2.8729085735721942674e4) * r + + 3.9307895800092710610e4) * + r + + 2.1213794301586595867e4) * + r + + 5.3941960214247511077e3) * + r + + 6.8718700749205790830e2) * + r + + 4.2313330701600911252e1) * + r + + 1.0); + } else { + double r = q < 0.0 ? p : 1.0 - p; + r = std::sqrt(-std::log(r)); + if (r < 5.0) { + r -= 1.6; + r = (((((((7.74545014278341407640e-4 * r + 2.27238449892691845833e-2) * r + + 2.41780725177450611770e-1) * + r + + 1.27045825245236838258e0) * + r + + 3.64784832476320460504e0) * + r + + 5.76949722146069140550e0) * + r + + 4.63033784615654529590e0) * + r + + 1.42343711074968357734e0) / + (((((((1.05075007164441684324e-9 * r + 5.47593808499534494600e-4) * r + + 1.51986665636164571966e-2) * + r + + 1.48103976427480074590e-1) * + r + + 6.89767334985100004550e-1) * + r + + 1.67638483018380384940e0) * + r + + 2.05319162663775882187e0) * + r + + 1.0); + } else { + r -= 5.0; + r = (((((((2.01033439929228813265e-7 * r + 2.71155556874348757815e-5) * r + + 1.24266094738807843860e-3) * + r + + 2.65321895265761230930e-2) * + r + + 2.96560571828504891230e-1) * + r + + 1.78482653991729133580e0) * + r + + 5.46378491116411436990e0) * + r + + 6.65790464350110377720e0) / + (((((((2.04426310338993978564e-15 * r + 1.42151175831644588870e-7) * r + + 1.84631831751005468180e-5) * + r + + 7.86869131145613259100e-4) * + r + + 1.48753612908506148525e-2) * + r + + 1.36929880922735805310e-1) * + r + + 5.99832206555887937690e-1) * + r + + 1.0); + } + return std::copysign(r, q); + } +} + +} // namespace arrow::internal diff --git a/cpp/src/arrow/util/math_internal.h b/cpp/src/arrow/util/math_internal.h new file mode 100644 index 0000000000000..db8856e708271 --- /dev/null +++ b/cpp/src/arrow/util/math_internal.h @@ -0,0 +1,36 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "arrow/util/macros.h" +#include "arrow/util/visibility.h" + +namespace arrow::internal { + +/// \brief Percent-point / quantile function (PPF) of the normal distribution. +/// +/// Given p in [0, 1], return the corresponding quantile value in the normal +/// distribution. This is the reciprocal of the cumulative distribution function. +/// +/// If p is not in [0, 1], behavior is undefined. +/// +/// This function is sometimes also called the probit function. +ARROW_EXPORT +double NormalPPF(double p); + +} // namespace arrow::internal diff --git a/cpp/src/arrow/util/math_test.cc b/cpp/src/arrow/util/math_test.cc new file mode 100644 index 0000000000000..2a801a4aa112a --- /dev/null +++ b/cpp/src/arrow/util/math_test.cc @@ -0,0 +1,82 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include + +#include "arrow/testing/gtest_util.h" +#include "arrow/util/math_internal.h" + +namespace arrow::internal { + +TEST(NormalPPF, Basics) { + struct PPFTestCase { + double input; + double expected; + }; + // Test vectors obtained using Scipy's norm.ppf + std::vector cases = { + {0.0, -HUGE_VAL}, + {0.001, -3.090232306167813}, + {0.01, -2.3263478740408408}, + {0.02, -2.053748910631823}, + {0.03, -1.880793608151251}, + {0.04, -1.75068607125217}, + {0.05, -1.6448536269514729}, + {0.06, -1.5547735945968535}, + {0.07, -1.4757910281791706}, + {0.08, -1.4050715603096329}, + {0.09, -1.3407550336902165}, + {0.1, -1.2815515655446004}, + {0.2, -0.8416212335729142}, + {0.3, -0.5244005127080409}, + {0.4, -0.2533471031357997}, + {0.5, 0.0}, + {0.6, 0.2533471031357997}, + {0.7, 0.5244005127080407}, + {0.8, 0.8416212335729143}, + {0.9, 1.2815515655446004}, + {0.91, 1.3407550336902165}, + {0.92, 1.4050715603096329}, + {0.93, 1.475791028179171}, + {0.94, 1.5547735945968535}, + {0.95, 1.6448536269514722}, + {0.96, 1.7506860712521692}, + {0.97, 1.8807936081512509}, + {0.98, 2.0537489106318225}, + {0.99, 2.3263478740408408}, + {0.999, 3.090232306167813}, + {1.0, HUGE_VAL}, + }; + for (auto test_case : cases) { + ARROW_SCOPED_TRACE("p = ", test_case.input); + EXPECT_DOUBLE_EQ(NormalPPF(test_case.input), test_case.expected); + } + // Test vectors from https://doi.org/10.2307/2347330 + cases = { + {0.25, -0.6744897501960817}, + {0.001, -3.090232306167814}, + {1e-20, -9.262340089798408}, + }; + for (auto test_case : cases) { + ARROW_SCOPED_TRACE("p = ", test_case.input); + EXPECT_DOUBLE_EQ(NormalPPF(test_case.input), test_case.expected); + } +} + +} // namespace arrow::internal diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst index 6acc9e31a5ffd..2a2a01c331962 100644 --- a/docs/source/cpp/compute.rst +++ b/docs/source/cpp/compute.rst @@ -1805,6 +1805,8 @@ in the respective option classes. +-----------------------+------------+---------------------------------------------------------+-------------------+-------------------------------+----------------+ | rank | Unary | Boolean, Numeric, Temporal, Binary- and String-like | UInt64 | :struct:`RankOptions` | \(4) | +-----------------------+------------+---------------------------------------------------------+-------------------+-------------------------------+----------------+ +| rank_normal | Unary | Boolean, Numeric, Temporal, Binary- and String-like | Float64 | :struct:`RankQuantileOptions` | \(5) | ++-----------------------+------------+---------------------------------------------------------+-------------------+-------------------------------+----------------+ | rank_quantile | Unary | Boolean, Numeric, Temporal, Binary- and String-like | Float64 | :struct:`RankQuantileOptions` | \(5) | +-----------------------+------------+---------------------------------------------------------+-------------------+-------------------------------+----------------+ | select_k_unstable | Unary | Boolean, Numeric, Temporal, Binary- and String-like | UInt64 | :struct:`SelectKOptions` | \(6) \(7) | @@ -1827,7 +1829,10 @@ in the respective option classes. * \(4) The output is a one-based numerical array of ranks. -* \(5) The output is an array of quantiles strictly between 0 and 1. +* \(5) The output of ``rank_quantile`` is an array of quantiles strictly between + 0 and 1. The ouput of ``rank_normal`` is an array of finite real values + corresponding to points in the normal distribution that reflect the input's + quantile ranks. * \(6) The input can be an array, chunked array, record batch or table. If the input is a record batch or table, one or more sort