From 49aa494813b57f691274a2d9327cb9cb3fe00649 Mon Sep 17 00:00:00 2001 From: Julie Tibshirani Date: Tue, 20 Aug 2019 22:54:06 -0700 Subject: [PATCH] Ensure quantile forests give consistent estimates across platforms. We now include a tie-breaker while sorting the list of sample IDs and outcomes. Without the tie-breaker, the sort could produce a different element ordering across different runs of the algorithm. --- .../prediction/QuantilePredictionStrategy.cpp | 6 +++++- .../resources/quantile_oob_predictions.csv | 18 +++++++++--------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/core/src/prediction/QuantilePredictionStrategy.cpp b/core/src/prediction/QuantilePredictionStrategy.cpp index 511126495..d9556bb90 100644 --- a/core/src/prediction/QuantilePredictionStrategy.cpp +++ b/core/src/prediction/QuantilePredictionStrategy.cpp @@ -52,7 +52,11 @@ std::vector QuantilePredictionStrategy::compute_quantile_cutoffs( std::sort(samples_and_values.begin(), samples_and_values.end(), [](std::pair first_pair, std::pair second_pair) { - return first_pair.second < second_pair.second; + // Note: we add a tie-breaker here to ensure that this sort consistently produces the + // same element ordering. Otherwise, different runs of the algorithm could result in + // different quantile predictions on the same data. + return first_pair.second < second_pair.second + || (first_pair.second == second_pair.second && first_pair.first < second_pair.first); }); std::vector quantile_cutoffs; diff --git a/core/test/forest/resources/quantile_oob_predictions.csv b/core/test/forest/resources/quantile_oob_predictions.csv index da10cfe6b..534c7d97f 100644 --- a/core/test/forest/resources/quantile_oob_predictions.csv +++ b/core/test/forest/resources/quantile_oob_predictions.csv @@ -121,7 +121,7 @@ -10, -10, 10 -10, -10, 10 -1, -1, 1 --1, -1, 1 +-1, 1, 1 -1, -1, 1 10, 10, 10 -10, -10, -10 @@ -410,7 +410,7 @@ -1, -1, 1 -1, -1, 1 -1, -1, 1 --10, -10, 10 +-10, 10, 10 -1, -1, 1 10, 10, 10 -1, -1, 1 @@ -421,7 +421,7 @@ -10, 10, 10 -1, -1, 1 -1, -1, 1 --1, -1, 1 +-1, 1, 1 -1, -1, 1 -1, 1, 1 -10, 10, 10 @@ -459,7 +459,7 @@ -1, -1, 1 -10, 10, 10 -10, -10, 10 --10, 10, 10 +-10, -10, 10 -1, -1, 1 -1, -1, 1 -10, -1, 10 @@ -537,7 +537,7 @@ -10, 10, 10 -1, 1, 1 -1, -1, 1 --10, 10, 10 +-10, -10, 10 -10, -10, 10 -1, 1, 1 -1, 1, 1 @@ -589,7 +589,7 @@ -1, 1, 1 -1, -1, 1 -10, 10, 10 --1, -1, 1 +-1, 1, 1 -1, 1, 1 -1, -1, 1 -10, -10, 10 @@ -832,7 +832,7 @@ -10, -10, 10 -1, 1, 1 -10, -10, -10 --1, 1, 1 +-1, -1, 1 -1, 1, 1 -1, -1, 1 -10, -10, 10 @@ -847,7 +847,7 @@ -1, -1, 1 -1, -1, 1 -1, -1, 1 --10, -10, 10 +-10, 10, 10 -10, -10, 10 -10, 10, 10 -1, -1, 1 @@ -857,7 +857,7 @@ 10, 10, 10 -10, -10, 1 -10, 10, 10 --10, -1, 10 +-10, -10, 10 -1, -1, -1 -1, 1, 1 -1, 1, 1