diff --git a/.travis.yml b/.travis.yml
index 0329a8a53b..fb5d43fec3 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -7,8 +7,6 @@ r_binary_packages:
   - devtools
   - testthat
   - roxygen2
-r_github_packages:
-  - jimhester/lintr
 
 matrix:
   include:
@@ -64,7 +62,6 @@ matrix:
         - valgrind --leak-check=full --error-exitcode=1 ./build/grf exclude:[characterization]
 
     - name: "grf R package"
-      env: LINTR_COMMENT_BOT=false
       before_install:
         - cd r-package/grf
       script:
diff --git a/DEVELOPING.md b/DEVELOPING.md
index fd79e5ba61..2da76dbd77 100644
--- a/DEVELOPING.md
+++ b/DEVELOPING.md
@@ -16,8 +16,7 @@ The core forest implementation is written in C++, with an R interface powered by
 
 ## R package
 
-To build the R package from source, cd into `r-package` and run `build_package.R`. Required development dependencies are listed there
-(note: it is recommended to install the latest [lintr](https://github.com/jimhester/lintr) package with `devtools::install_github("jimhester/lintr")`). This mimics the tests run when submitting a pull request.
+To build the R package from source, cd into `r-package` and run `build_package.R`. Required development dependencies are listed there. This mimics the tests run when submitting a pull request.
 
 An alternative development workflow is to use the accompanying grf.Rproj and build and test the package with RStudio's build menu, which can be convenient
 for quickly iterating C++/R code changes.
diff --git a/core/src/commons/utility.cpp b/core/src/commons/utility.cpp
index d9117d8d32..e4b4e06bef 100644
--- a/core/src/commons/utility.cpp
+++ b/core/src/commons/utility.cpp
@@ -20,7 +20,6 @@
 #include <sstream>
 #include <unordered_set>
 #include <unordered_map>
-#include <random>
 
 #include "utility.h"
 #include "SparseData.h"
diff --git a/core/src/commons/utility.h b/core/src/commons/utility.h
index 6ff539fcb7..390863a7e0 100644
--- a/core/src/commons/utility.h
+++ b/core/src/commons/utility.h
@@ -22,7 +22,6 @@
 #include <iostream>
 #include <fstream>
 #include <algorithm>
-#include <random>
 #include <unordered_set>
 #include <unordered_map>
 
diff --git a/core/src/forest/ForestTrainer.cpp b/core/src/forest/ForestTrainer.cpp
index ac4b4430f5..86aa807c83 100644
--- a/core/src/forest/ForestTrainer.cpp
+++ b/core/src/forest/ForestTrainer.cpp
@@ -23,6 +23,7 @@
 
 #include "commons/utility.h"
 #include "ForestTrainer.h"
+#include "random/random.hpp"
 
 ForestTrainer::ForestTrainer(std::shared_ptr<RelabelingStrategy> relabeling_strategy,
                              std::shared_ptr<SplittingRuleFactory> splitting_rule_factory,
@@ -87,7 +88,7 @@ std::vector<std::shared_ptr<Tree>> ForestTrainer::train_batch(
   size_t ci_group_size = options.get_ci_group_size();
 
   std::mt19937_64 random_number_generator(options.get_random_seed() + start);
-  std::uniform_int_distribution<uint> udist;
+  nonstd::uniform_int_distribution<uint> udist;
   std::vector<std::shared_ptr<Tree>> trees;
 
   if (ci_group_size == 1) {
diff --git a/core/src/sampling/RandomSampler.cpp b/core/src/sampling/RandomSampler.cpp
index fb61dc2093..152a11b2cd 100644
--- a/core/src/sampling/RandomSampler.cpp
+++ b/core/src/sampling/RandomSampler.cpp
@@ -14,6 +14,7 @@
 
 #include <algorithm>
 #include <unordered_map>
+#include <random>
 
 #include "RandomSampler.h"
 
@@ -52,7 +53,7 @@ void RandomSampler::subsample(const std::vector<size_t>& samples,
                               double sample_fraction,
                               std::vector<size_t>& subsamples) {
   std::vector<size_t> shuffled_sample(samples);
-  std::shuffle(shuffled_sample.begin(), shuffled_sample.end(), random_number_generator);
+  nonstd::shuffle(shuffled_sample.begin(), shuffled_sample.end(), random_number_generator);
 
   uint subsample_size = (uint) std::ceil(samples.size() * sample_fraction);
   subsamples.resize(subsample_size);
@@ -66,7 +67,7 @@ void RandomSampler::subsample(const std::vector<size_t>& samples,
                               std::vector<size_t>& subsamples,
                               std::vector<size_t>& oob_samples) {
   std::vector<size_t> shuffled_sample(samples);
-  std::shuffle(shuffled_sample.begin(), shuffled_sample.end(), random_number_generator);
+  nonstd::shuffle(shuffled_sample.begin(), shuffled_sample.end(), random_number_generator);
 
   size_t subsample_size = (size_t) std::ceil(samples.size() * sample_fraction);
   subsamples.resize(subsample_size);
@@ -84,7 +85,7 @@ void RandomSampler::subsample_with_size(const std::vector<size_t>& samples,
                                         size_t subsample_size,
                                         std::vector<size_t>& subsamples) {
   std::vector<size_t> shuffled_sample(samples);
-  std::shuffle(shuffled_sample.begin(), shuffled_sample.end(), random_number_generator);
+  nonstd::shuffle(shuffled_sample.begin(), shuffled_sample.end(), random_number_generator);
 
   subsamples.resize(subsample_size);
   std::copy(shuffled_sample.begin(),
@@ -133,7 +134,7 @@ void RandomSampler::shuffle_and_split(std::vector<size_t>& samples,
 
   // Fill with 0..n_all-1 and shuffle
   std::iota(samples.begin(), samples.end(), 0);
-  std::shuffle(samples.begin(), samples.end(), random_number_generator);
+  nonstd::shuffle(samples.begin(), samples.end(), random_number_generator);
 
   samples.resize(size);
 }
@@ -159,7 +160,7 @@ void RandomSampler::draw_simple(std::vector<size_t>& result,
   std::vector<bool> temp;
   temp.resize(max, false);
 
-  std::uniform_int_distribution<size_t> unif_dist(0, max - 1 - skip.size());
+  nonstd::uniform_int_distribution<size_t> unif_dist(0, max - 1 - skip.size());
   for (size_t i = 0; i < num_samples; ++i) {
     size_t draw;
     do {
@@ -191,7 +192,7 @@ void RandomSampler::draw_fisher_yates(std::vector<size_t>& result,
 
   // Draw without replacement using Fisher Yates algorithm
   for (size_t i = result.size() - 1; i > 0; --i) {
-    std::uniform_int_distribution<size_t> distribution(0, i);
+    nonstd::uniform_int_distribution<size_t> distribution(0, i);
     size_t j = distribution(random_number_generator);
     std::swap(result[i], result[j]);
   }
@@ -210,7 +211,7 @@ void RandomSampler::draw_weighted(std::vector<size_t>& result,
   std::vector<bool> temp;
   temp.resize(max + 1, false);
 
-  std::discrete_distribution<> weighted_dist(weights.begin(), weights.end());
+  nonstd::discrete_distribution<> weighted_dist(weights.begin(), weights.end());
   for (size_t i = 0; i < num_samples; ++i) {
     size_t draw;
     do {
@@ -222,6 +223,6 @@ void RandomSampler::draw_weighted(std::vector<size_t>& result,
 }
 
 size_t RandomSampler::sample_poisson(size_t mean) {
-  std::poisson_distribution<size_t> distribution(mean);
+  nonstd::poisson_distribution<size_t> distribution(mean);
   return distribution(random_number_generator);
 }
diff --git a/core/src/sampling/RandomSampler.h b/core/src/sampling/RandomSampler.h
index 8a492ed8b8..cf1736defd 100644
--- a/core/src/sampling/RandomSampler.h
+++ b/core/src/sampling/RandomSampler.h
@@ -22,6 +22,8 @@
 #include "commons/globals.h"
 #include "commons/utility.h"
 #include "SamplingOptions.h"
+#include "random/random.hpp"
+#include "random/algorithm.hpp"
 
 #include <cstddef>
 #include <random>
diff --git a/core/src/tree/Tree.h b/core/src/tree/Tree.h
index 2de4f58188..ea1a646864 100644
--- a/core/src/tree/Tree.h
+++ b/core/src/tree/Tree.h
@@ -19,7 +19,6 @@
 #define GRF_TREE_H_
 
 #include <vector>
-#include <random>
 #include <iostream>
 
 #include "commons/globals.h"
diff --git a/core/third_party/random/README.md b/core/third_party/random/README.md
new file mode 100644
index 0000000000..65303352e0
--- /dev/null
+++ b/core/third_party/random/README.md
@@ -0,0 +1,73 @@
+# third_party/random
+
+## What are these files?
+
+They are copies of `random` and `algorithm` headers from the [llvm](https://github.com/llvm-mirror/libcxx/blob/master/include/) standard library.
+
+
+## Motivation
+
+Users complained about stability of random numbers across machines when setting seed across different platforms. See issue [#379](https://github.com/grf-labs/grf/issues/379).
+
+As [pointed out](https://github.com/grf-labs/grf/issues/379#issuecomment-480641123) by @jtibshirani:
+
+> the mersenne twister has the same implementation across platforms, the other random methods may differ from compiler to compiler
+
+
+In PR [#469](https://github.com/grf-labs/grf/pull/469), @halflearned included this reduced copy of the relevant headers, ensuring random number generation is done in a consistent way across compilers.
+
+
+## How to reproduce
+
+#### File `random.hpp`
+
+Extract only the relevant classes and functions from `random.hpp`:
+
++ `__independent_bits_engine`
++ `__clz`
++ `__log2`
++ `__log2_imp`
++ `__generate_canonical`
++ `uniform_int_distribution`
++ `poisson_distribution`
++ `uniform_real_distribution`
++ `normal_distribution`
++ `exponential_distribution`
++ `discrete_distribution`
+
+From each class, remove all methods associated with `operator<<`.
+
+Find and remove the following `_LIBCPP` macros:
++ `_LIBCPP_BEGIN_NAMESPACE_STD`
++ `_LIBCPP_CONSTEXPR`
++ `_LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK`
++ `_LIBCPP_END_NAMESPACE_STD`
++ `_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER`
++ `_LIBCPP_INLINE_VISIBILITY`
++ `_LIBCPP_MSVCRT`
++ `_LIBCPP_POP_MACROS`
++ `_LIBCPP_PUSH_MACROS`
++ `_LIBCPP_RANDOM`
++ `_LIBCPP_TEMPLATE_VIS`
++ `_LIBCPP_TYPE_VIS`
++ `_LIBCPP_USING_DEV_RANDOM`
+
+Find and replace prefix:
++ `_VSTD::` -> `std::`
+
+Add `namespace nonstd`.
+
+
+#### File `algorithm.hpp`
+
+Include modified `random.hpp`
+
+Extract relevant class:
+
++ `shuffle`
+
+Replace prefix:
+
++ `std::uniform_int_distribution` -> `nonstd::uniform_int_distribution`
+
+Add `namespace nonstd`.
diff --git a/core/third_party/random/algorithm.hpp b/core/third_party/random/algorithm.hpp
new file mode 100644
index 0000000000..d4b15e29c5
--- /dev/null
+++ b/core/third_party/random/algorithm.hpp
@@ -0,0 +1,35 @@
+#include <limits>
+#include <cmath>
+#include <algorithm>
+#include <iterator>
+#include "random/random.hpp"
+
+#ifndef _GRFSTD_ALGORITHM
+#define _GRFSTD_ALGORITHM
+
+namespace nonstd {
+
+        template<class _RandomAccessIterator, class _UniformRandomNumberGenerator>
+        void shuffle(_RandomAccessIterator __first, _RandomAccessIterator __last,
+#ifndef _LIBCPP_CXX03_LANG
+                     _UniformRandomNumberGenerator &&__g)
+#else
+        _UniformRandomNumberGenerator& __g)
+#endif
+        {
+            typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type difference_type;
+            typedef nonstd::uniform_int_distribution<ptrdiff_t> _Dp;
+            typedef typename _Dp::param_type _Pp;
+            difference_type __d = __last - __first;
+            if (__d > 1) {
+                _Dp __uid;
+                for (--__last, --__d; __first < __last; ++__first, --__d) {
+                    difference_type __i = __uid(__g, _Pp(0, __d));
+                    if (__i != difference_type(0))
+                        std::swap(*__first, *(__first + __i));
+                }
+            }
+        }
+}
+
+#endif  // _GRFSTD_ALGORITHM
diff --git a/core/third_party/random/random.hpp b/core/third_party/random/random.hpp
new file mode 100644
index 0000000000..52f5658842
--- /dev/null
+++ b/core/third_party/random/random.hpp
@@ -0,0 +1,1098 @@
+#include <limits>
+#include <cmath>
+#include <iterator>
+#include <algorithm>
+#include <vector>
+#include <initializer_list>
+#include <numeric>
+#include <iostream>
+#include <random>
+#include <type_traits>
+
+#ifndef _LIBGRF_RANDOM
+#define _LIBGRF_RANDOM
+
+namespace nonstd {
+
+
+// Precondition:  __x != 0
+    inline
+    unsigned __clz(unsigned __x) {
+#ifndef _LIBCPP_COMPILER_MSVC
+        return static_cast<unsigned>(__builtin_clz(__x));
+#else
+        static_assert(sizeof(unsigned) == sizeof(unsigned long), "");
+  static_assert(sizeof(unsigned long) == 4, "");
+  unsigned long where;
+  // Search from LSB to MSB for first set bit.
+  // Returns zero if no set bit is found.
+  if (_BitScanReverse(&where, __x))
+    return 31 - where;
+  return 32; // Undefined Behavior.
+#endif
+    }
+
+    inline
+    unsigned long __clz(unsigned long __x) {
+#ifndef _LIBCPP_COMPILER_MSVC
+        return static_cast<unsigned long>(__builtin_clzl(__x));
+#else
+        static_assert(sizeof(unsigned) == sizeof(unsigned long), "");
+    return __clz(static_cast<unsigned>(__x));
+#endif
+    }
+
+    inline
+    unsigned long long __clz(unsigned long long __x) {
+#ifndef _LIBCPP_COMPILER_MSVC
+        return static_cast<unsigned long long>(__builtin_clzll(__x));
+#else
+        unsigned long where;
+// BitScanReverse scans from MSB to LSB for first set bit.
+// Returns 0 if no set bit is found.
+#if defined(_LIBCPP_HAS_BITSCAN64)
+  if (_BitScanReverse64(&where, __x))
+    return static_cast<int>(63 - where);
+#else
+  // Scan the high 32 bits.
+  if (_BitScanReverse(&where, static_cast<unsigned long>(__x >> 32)))
+    return 63 - (where + 32); // Create a bit offset from the MSB.
+  // Scan the low 32 bits.
+  if (_BitScanReverse(&where, static_cast<unsigned long>(__x)))
+    return 63 - where;
+#endif
+  return 64; // Undefined Behavior.
+#endif // _LIBCPP_COMPILER_MSVC
+    }
+
+
+// __independent_bits_engine
+
+    template<unsigned long long _Xp, size_t _Rp>
+    struct __log2_imp {
+        static const size_t value = _Xp & ((unsigned long long) (1) << _Rp) ? _Rp
+                                                                            : __log2_imp<_Xp, _Rp - 1>::value;
+    };
+
+    template<unsigned long long _Xp>
+    struct __log2_imp<_Xp, 0> {
+        static const size_t value = 0;
+    };
+
+    template<size_t _Rp>
+    struct __log2_imp<0, _Rp> {
+        static const size_t value = _Rp + 1;
+    };
+
+    template<class _UIntType, _UIntType _Xp>
+    struct __log2 {
+        static const size_t value = __log2_imp<_Xp,
+                sizeof(_UIntType) * __CHAR_BIT__ - 1>::value;
+    };
+
+    template<class _Engine, class _UIntType>
+    class __independent_bits_engine {
+    public:
+        // types
+        typedef _UIntType result_type;
+
+    private:
+        typedef typename _Engine::result_type _Engine_result_type;
+        typedef typename std::conditional
+                <
+                        sizeof(_Engine_result_type) <= sizeof(result_type),
+                        result_type,
+                        _Engine_result_type
+                >::type _Working_result_type;
+
+        _Engine &__e_;
+        size_t __w_;
+        size_t __w0_;
+        size_t __n_;
+        size_t __n0_;
+        _Working_result_type __y0_;
+        _Working_result_type __y1_;
+        _Engine_result_type __mask0_;
+        _Engine_result_type __mask1_;
+
+#ifdef _LIBCPP_CXX03_LANG
+        static const _Working_result_type _Rp = _Engine::_Max - _Engine::_Min
+                                          + _Working_result_type(1);
+#else
+        static const _Working_result_type _Rp = _Engine::max() - _Engine::min()
+                                                                  + _Working_result_type(1);
+#endif
+        static const size_t __m = __log2<_Working_result_type, _Rp>::value;
+        static const size_t _WDt = std::numeric_limits<_Working_result_type>::digits;
+        static const size_t _EDt = std::numeric_limits<_Engine_result_type>::digits;
+
+    public:
+        // constructors and seeding functions
+        __independent_bits_engine(_Engine &__e, size_t __w);
+
+        // generating functions
+        result_type operator()() { return __eval(std::integral_constant<bool, _Rp != 0>()); }
+
+    private:
+        result_type __eval(std::false_type);
+
+        result_type __eval(std::true_type);
+    };
+
+    template<class _Engine, class _UIntType>
+    __independent_bits_engine<_Engine, _UIntType>
+    ::__independent_bits_engine(_Engine &__e, size_t __w)
+            : __e_(__e),
+              __w_(__w) {
+        __n_ = __w_ / __m + (__w_ % __m != 0);
+        __w0_ = __w_ / __n_;
+        if (_Rp == 0)
+            __y0_ = _Rp;
+        else if (__w0_ < _WDt)
+            __y0_ = (_Rp >> __w0_) << __w0_;
+        else
+            __y0_ = 0;
+        if (_Rp - __y0_ > __y0_ / __n_) {
+            ++__n_;
+            __w0_ = __w_ / __n_;
+            if (__w0_ < _WDt)
+                __y0_ = (_Rp >> __w0_) << __w0_;
+            else
+                __y0_ = 0;
+        }
+        __n0_ = __n_ - __w_ % __n_;
+        if (__w0_ < _WDt - 1)
+            __y1_ = (_Rp >> (__w0_ + 1)) << (__w0_ + 1);
+        else
+            __y1_ = 0;
+        __mask0_ = __w0_ > 0 ? _Engine_result_type(~0) >> (_EDt - __w0_) :
+                   _Engine_result_type(0);
+        __mask1_ = __w0_ < _EDt - 1 ?
+                   _Engine_result_type(~0) >> (_EDt - (__w0_ + 1)) :
+                   _Engine_result_type(~0);
+    }
+
+    template<class _Engine, class _UIntType>
+    inline
+    _UIntType
+    __independent_bits_engine<_Engine, _UIntType>::__eval(std::false_type) {
+        return static_cast<result_type>(__e_() & __mask0_);
+    }
+
+    template<class _Engine, class _UIntType>
+    _UIntType
+    __independent_bits_engine<_Engine, _UIntType>::__eval(std::true_type) {
+        const size_t _WRt = std::numeric_limits<result_type>::digits;
+        result_type _Sp = 0;
+        for (size_t __k = 0; __k < __n0_; ++__k) {
+            _Engine_result_type __u;
+            do {
+                __u = __e_() - _Engine::min();
+            } while (__u >= __y0_);
+            if (__w0_ < _WRt)
+                _Sp <<= __w0_;
+            else
+                _Sp = 0;
+            _Sp += __u & __mask0_;
+        }
+        for (size_t __k = __n0_; __k < __n_; ++__k) {
+            _Engine_result_type __u;
+            do {
+                __u = __e_() - _Engine::min();
+            } while (__u >= __y1_);
+            if (__w0_ < _WRt - 1)
+                _Sp <<= __w0_ + 1;
+            else
+                _Sp = 0;
+            _Sp += __u & __mask1_;
+        }
+        return _Sp;
+    }
+
+
+// uniform_int_distribution
+
+    template<class _IntType = int>
+    class uniform_int_distribution {
+    public:
+        // types
+        typedef _IntType result_type;
+
+        class param_type {
+            result_type __a_;
+            result_type __b_;
+        public:
+            typedef uniform_int_distribution distribution_type;
+
+            explicit param_type(result_type __a = 0,
+                                result_type __b = std::numeric_limits<result_type>::max())
+                    : __a_(__a), __b_(__b) {}
+
+            result_type a() const { return __a_; }
+
+            result_type b() const { return __b_; }
+
+            friend bool operator==(const param_type &__x, const param_type &__y) {
+                return __x.__a_ == __y.__a_ && __x.__b_ == __y.__b_;
+            }
+
+            friend bool operator!=(const param_type &__x, const param_type &__y) { return !(__x == __y); }
+        };
+
+    private:
+        param_type __p_;
+
+    public:
+        // constructors and reset functions
+        explicit uniform_int_distribution(result_type __a = 0,
+                                          result_type __b = std::numeric_limits<result_type>::max())
+                : __p_(param_type(__a, __b)) {}
+
+        explicit uniform_int_distribution(const param_type &__p) : __p_(__p) {}
+
+        void reset() {}
+
+        // generating functions
+        template<class _URNG>
+        result_type operator()(_URNG &__g) { return (*this)(__g, __p_); }
+
+        template<class _URNG>
+        result_type operator()(_URNG &__g, const param_type &__p);
+
+        // property functions
+        result_type a() const { return __p_.a(); }
+
+        result_type b() const { return __p_.b(); }
+
+        param_type param() const { return __p_; }
+
+        void param(const param_type &__p) { __p_ = __p; }
+
+        result_type min() const { return a(); }
+
+        result_type max() const { return b(); }
+
+        friend bool operator==(const uniform_int_distribution &__x,
+                               const uniform_int_distribution &__y) { return __x.__p_ == __y.__p_; }
+
+        friend bool operator!=(const uniform_int_distribution &__x,
+                               const uniform_int_distribution &__y) { return !(__x == __y); }
+    };
+
+    template<class _IntType>
+    template<class _URNG>
+    typename uniform_int_distribution<_IntType>::result_type
+    uniform_int_distribution<_IntType>::operator()(_URNG &__g, const param_type &__p) {
+        typedef typename std::conditional<sizeof(result_type) <= sizeof(uint32_t),
+                uint32_t, uint64_t>::type _UIntType;
+        const _UIntType _Rp = __p.b() - __p.a() + _UIntType(1);
+        if (_Rp == 1)
+            return __p.a();
+        const size_t _Dt = std::numeric_limits<_UIntType>::digits;
+        typedef __independent_bits_engine<_URNG, _UIntType> _Eng;
+        if (_Rp == 0)
+            return static_cast<result_type>(_Eng(__g, _Dt)());
+        size_t __w = _Dt - __clz(_Rp) - 1;
+        if ((_Rp & (std::numeric_limits<_UIntType>::max() >> (_Dt - __w))) != 0)
+            ++__w;
+        _Eng __e(__g, __w);
+        _UIntType __u;
+        do {
+            __u = __e();
+        } while (__u >= _Rp);
+        return static_cast<result_type>(__u + __p.a());
+    }
+
+#if _LIBCPP_STD_VER <= 14 || defined(_LIBCPP_ENABLE_CXX17_REMOVED_RANDOM_SHUFFLE) \
+ || defined(_LIBCPP_BUILDING_LIBRARY)
+
+    class  __rs_default;
+
+     __rs_default __rs_get();
+
+    class  __rs_default {
+        static unsigned __c_;
+
+        __rs_default();
+
+    public:
+        typedef uint_fast32_t result_type;
+
+        static const result_type _Min = 0;
+        static const result_type _Max = 0xFFFFFFFF;
+
+        __rs_default(const __rs_default &);
+
+        ~__rs_default();
+
+        result_type operator()();
+
+        static result_type min() { return _Min; }
+
+        static result_type max() { return _Max; }
+
+        friend  __rs_default __rs_get();
+    };
+
+     __rs_default __rs_get();
+
+    template<class _RandomAccessIterator>
+    void
+    random_shuffle(_RandomAccessIterator __first, _RandomAccessIterator __last) {
+        typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type difference_type;
+        typedef uniform_int_distribution<ptrdiff_t> _Dp;
+        typedef typename _Dp::param_type _Pp;
+        difference_type __d = __last - __first;
+        if (__d > 1) {
+            _Dp __uid;
+            __rs_default __g = __rs_get();
+            for (--__last, --__d; __first < __last; ++__first, --__d) {
+                difference_type __i = __uid(__g, _Pp(0, __d));
+                if (__i != difference_type(0))
+                    swap(*__first, *(__first + __i));
+            }
+        }
+    }
+
+    template<class _RandomAccessIterator, class _RandomNumberGenerator>
+    void
+    random_shuffle(_RandomAccessIterator __first, _RandomAccessIterator __last,
+#ifndef _LIBCPP_CXX03_LANG
+                   _RandomNumberGenerator &&__rand)
+#else
+    _RandomNumberGenerator& __rand)
+#endif
+    {
+        typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type difference_type;
+        difference_type __d = __last - __first;
+        if (__d > 1) {
+            for (--__last; __first < __last; ++__first, --__d) {
+                difference_type __i = __rand(__d);
+                swap(*__first, *(__first + __i));
+            }
+        }
+    }
+
+#endif
+
+    template<class _PopulationIterator, class _SampleIterator, class _Distance,
+            class _UniformRandomNumberGenerator>
+
+    _SampleIterator __sample(_PopulationIterator __first,
+                             _PopulationIterator __last, _SampleIterator __output_iter,
+                             _Distance __n,
+                             _UniformRandomNumberGenerator &__g,
+                             std::input_iterator_tag) {
+
+        _Distance __k = 0;
+        for (; __first != __last && __k < __n; ++__first, (void) ++__k)
+            __output_iter[__k] = *__first;
+        _Distance __sz = __k;
+        for (; __first != __last; ++__first, (void) ++__k) {
+            _Distance __r = uniform_int_distribution<_Distance>(0, __k)(__g);
+            if (__r < __sz)
+                __output_iter[__r] = *__first;
+        }
+        return __output_iter + std::min(__n, __k);
+    }
+
+
+
+// generate_canonical
+
+    template<class _RealType, size_t __bits, class _URNG>
+    _RealType
+    __generate_canonical(_URNG &__g) {
+        const size_t _Dt = std::numeric_limits<_RealType>::digits;
+        const size_t __b = _Dt < __bits ? _Dt : __bits;
+#ifdef _LIBCPP_CXX03_LANG
+        const size_t __logR = __log2<uint64_t, _URNG::_Max - _URNG::_Min + uint64_t(1)>::value;
+#else
+        const size_t __logR = __log2<uint64_t, _URNG::max() - _URNG::min() + uint64_t(1)>::value;
+#endif
+        const size_t __k = __b / __logR + (__b % __logR != 0) + (__b == 0);
+        const _RealType _Rp = _URNG::max() - _URNG::min() + _RealType(1);
+        _RealType __base = _Rp;
+        _RealType _Sp = __g() - _URNG::min();
+        for (size_t __i = 1; __i < __k; ++__i, __base *= _Rp)
+            _Sp += (__g() - _URNG::min()) * __base;
+        return _Sp / __base;
+    }
+
+
+
+
+// poisson distribution
+
+    template<class _IntType = int>
+    class  poisson_distribution {
+    public:
+        // types
+        typedef _IntType result_type;
+
+        class  param_type {
+            double __mean_;
+            double __s_;
+            double __d_;
+            double __l_;
+            double __omega_;
+            double __c0_;
+            double __c1_;
+            double __c2_;
+            double __c3_;
+            double __c_;
+
+        public:
+            typedef poisson_distribution distribution_type;
+
+            explicit param_type(double __mean = 1.0);
+
+
+            double mean() const { return __mean_; }
+
+            friend
+            bool operator==(const param_type &__x, const param_type &__y) { return __x.__mean_ == __y.__mean_; }
+
+            friend
+            bool operator!=(const param_type &__x, const param_type &__y) { return !(__x == __y); }
+
+            friend class poisson_distribution;
+        };
+
+    private:
+        param_type __p_;
+
+    public:
+        // constructors and reset functions
+
+        explicit poisson_distribution(double __mean = 1.0) : __p_(__mean) {}
+
+
+        explicit poisson_distribution(const param_type &__p) : __p_(__p) {}
+
+
+        void reset() {}
+
+        // generating functions
+        template<class _URNG>
+
+        result_type operator()(_URNG &__g) { return (*this)(__g, __p_); }
+
+        template<class _URNG>
+        result_type operator()(_URNG &__g, const param_type &__p);
+
+        // property functions
+
+        double mean() const { return __p_.mean(); }
+
+
+        param_type param() const { return __p_; }
+
+
+        void param(const param_type &__p) { __p_ = __p; }
+
+
+        result_type min() const { return 0; }
+
+
+        result_type max() const { return std::numeric_limits<result_type>::max(); }
+
+        friend
+        bool operator==(const poisson_distribution &__x,
+                        const poisson_distribution &__y) { return __x.__p_ == __y.__p_; }
+
+        friend
+        bool operator!=(const poisson_distribution &__x,
+                        const poisson_distribution &__y) { return !(__x == __y); }
+    };
+
+//#define std std::_LIBCPP_NAMESPACE
+
+    template<class _IntType>
+    poisson_distribution<_IntType>::param_type::param_type(double __mean)
+            : __mean_(__mean) {
+        if (__mean_ < 10) {
+            __s_ = 0;
+            __d_ = 0;
+            __l_ = std::exp(-__mean_);
+            __omega_ = 0;
+            __c3_ = 0;
+            __c2_ = 0;
+            __c1_ = 0;
+            __c0_ = 0;
+            __c_ = 0;
+        } else {
+            __s_ = std::sqrt(__mean_);
+            __d_ = 6 * __mean_ * __mean_;
+            __l_ = static_cast<result_type>(__mean_ - 1.1484);
+            __omega_ = .3989423 / __s_;
+            double __b1_ = .4166667E-1 / __mean_;
+            double __b2_ = .3 * __b1_ * __b1_;
+            __c3_ = .1428571 * __b1_ * __b2_;
+            __c2_ = __b2_ - 15. * __c3_;
+            __c1_ = __b1_ - 6. * __b2_ + 45. * __c3_;
+            __c0_ = 1. - __b1_ + 3. * __b2_ - 15. * __c3_;
+            __c_ = .1069 / __mean_;
+        }
+    }
+
+
+// uniform_real_distribution
+
+    template<class _RealType = double>
+    class  uniform_real_distribution {
+    public:
+        // types
+        typedef _RealType result_type;
+
+        class  param_type {
+            result_type __a_;
+            result_type __b_;
+        public:
+            typedef uniform_real_distribution distribution_type;
+
+
+            explicit param_type(result_type __a = 0,
+                                result_type __b = 1)
+                    : __a_(__a), __b_(__b) {}
+
+
+            result_type a() const { return __a_; }
+
+
+            result_type b() const { return __b_; }
+
+            friend
+            bool operator==(const param_type &__x, const param_type &__y) {
+                return __x.__a_ == __y.__a_ && __x.__b_ == __y.__b_;
+            }
+
+            friend
+            bool operator!=(const param_type &__x, const param_type &__y) { return !(__x == __y); }
+        };
+
+    private:
+        param_type __p_;
+
+    public:
+        // constructors and reset functions
+
+        explicit uniform_real_distribution(result_type __a = 0, result_type __b = 1)
+                : __p_(param_type(__a, __b)) {}
+
+
+        explicit uniform_real_distribution(const param_type &__p) : __p_(__p) {}
+
+
+        void reset() {}
+
+        // generating functions
+        template<class _URNG>
+
+        result_type operator()(_URNG &__g) { return (*this)(__g, __p_); }
+
+        template<class _URNG>
+         result_type operator()(_URNG &__g, const param_type &__p);
+
+        // property functions
+
+        result_type a() const { return __p_.a(); }
+
+
+        result_type b() const { return __p_.b(); }
+
+
+        param_type param() const { return __p_; }
+
+
+        void param(const param_type &__p) { __p_ = __p; }
+
+
+        result_type min() const { return a(); }
+
+
+        result_type max() const { return b(); }
+
+        friend
+        bool operator==(const uniform_real_distribution &__x,
+                        const uniform_real_distribution &__y) { return __x.__p_ == __y.__p_; }
+
+        friend
+        bool operator!=(const uniform_real_distribution &__x,
+                        const uniform_real_distribution &__y) { return !(__x == __y); }
+    };
+
+    template<class _RealType>
+    template<class _URNG>
+    inline
+    typename uniform_real_distribution<_RealType>::result_type
+    uniform_real_distribution<_RealType>::operator()(_URNG &__g, const param_type &__p) {
+        return (__p.b() - __p.a())
+               * __generate_canonical<_RealType, std::numeric_limits<_RealType>::digits>(__g)
+               + __p.a();
+    }
+
+
+// normal_distribution
+
+    template<class _RealType = double>
+    class  normal_distribution {
+    public:
+        // types
+        typedef _RealType result_type;
+
+        class  param_type {
+            result_type __mean_;
+            result_type __stddev_;
+        public:
+            typedef normal_distribution distribution_type;
+
+
+            explicit param_type(result_type __mean = 0, result_type __stddev = 1)
+                    : __mean_(__mean), __stddev_(__stddev) {}
+
+
+            result_type mean() const { return __mean_; }
+
+
+            result_type stddev() const { return __stddev_; }
+
+            friend
+            bool operator==(const param_type &__x, const param_type &__y) {
+                return __x.__mean_ == __y.__mean_ && __x.__stddev_ == __y.__stddev_;
+            }
+
+            friend
+            bool operator!=(const param_type &__x, const param_type &__y) { return !(__x == __y); }
+        };
+
+    private:
+        param_type __p_;
+        result_type _V_;
+        bool _V_hot_;
+
+    public:
+        // constructors and reset functions
+
+        explicit normal_distribution(result_type __mean = 0, result_type __stddev = 1)
+                : __p_(param_type(__mean, __stddev)), _V_hot_(false) {}
+
+
+        explicit normal_distribution(const param_type &__p)
+                : __p_(__p), _V_hot_(false) {}
+
+
+        void reset() { _V_hot_ = false; }
+
+        // generating functions
+        template<class _URNG>
+
+        result_type operator()(_URNG &__g) { return (*this)(__g, __p_); }
+
+        template<class _URNG>
+        result_type operator()(_URNG &__g, const param_type &__p);
+
+        // property functions
+
+        result_type mean() const { return __p_.mean(); }
+
+
+        result_type stddev() const { return __p_.stddev(); }
+
+
+        param_type param() const { return __p_; }
+
+
+        void param(const param_type &__p) { __p_ = __p; }
+
+
+        result_type min() const { return -std::numeric_limits<result_type>::infinity(); }
+
+
+        result_type max() const { return std::numeric_limits<result_type>::infinity(); }
+
+        friend
+        bool operator==(const normal_distribution &__x,
+                        const normal_distribution &__y) {
+            return __x.__p_ == __y.__p_ && __x._V_hot_ == __y._V_hot_ &&
+                   (!__x._V_hot_ || __x._V_ == __y._V_);
+        }
+
+        friend
+        bool operator!=(const normal_distribution &__x,
+                        const normal_distribution &__y) { return !(__x == __y); }
+
+    };
+
+    template<class _RealType>
+    template<class _URNG>
+    _RealType
+    normal_distribution<_RealType>::operator()(_URNG &__g, const param_type &__p) {
+        result_type _Up;
+        if (_V_hot_) {
+            _V_hot_ = false;
+            _Up = _V_;
+        } else {
+            uniform_real_distribution<result_type> _Uni(-1, 1);
+            result_type __u;
+            result_type __v;
+            result_type __s;
+            do {
+                __u = _Uni(__g);
+                __v = _Uni(__g);
+                __s = __u * __u + __v * __v;
+            } while (__s > 1 || __s == 0);
+            result_type _Fp = std::sqrt(-2 * std::log(__s) / __s);
+            _V_ = __v * _Fp;
+            _V_hot_ = true;
+            _Up = __u * _Fp;
+        }
+        return _Up * __p.stddev() + __p.mean();
+    }
+
+
+
+
+// exponential_distribution
+
+    template<class _RealType = double>
+    class  exponential_distribution {
+    public:
+        // types
+        typedef _RealType result_type;
+
+        class  param_type {
+            result_type __lambda_;
+        public:
+            typedef exponential_distribution distribution_type;
+
+
+            explicit param_type(result_type __lambda = 1) : __lambda_(__lambda) {}
+
+
+            result_type lambda() const { return __lambda_; }
+
+            friend
+            bool operator==(const param_type &__x, const param_type &__y) { return __x.__lambda_ == __y.__lambda_; }
+
+            friend
+            bool operator!=(const param_type &__x, const param_type &__y) { return !(__x == __y); }
+        };
+
+    private:
+        param_type __p_;
+
+    public:
+        // constructors and reset functions
+
+        explicit exponential_distribution(result_type __lambda = 1)
+                : __p_(param_type(__lambda)) {}
+
+
+        explicit exponential_distribution(const param_type &__p) : __p_(__p) {}
+
+
+        void reset() {}
+
+        // generating functions
+        template<class _URNG>
+
+        result_type operator()(_URNG &__g) { return (*this)(__g, __p_); }
+
+        template<class _URNG>
+        result_type operator()(_URNG &__g, const param_type &__p);
+
+        // property functions
+
+        result_type lambda() const { return __p_.lambda(); }
+
+
+        param_type param() const { return __p_; }
+
+
+        void param(const param_type &__p) { __p_ = __p; }
+
+
+        result_type min() const { return 0; }
+
+
+        result_type max() const { return std::numeric_limits<result_type>::infinity(); }
+
+        friend
+        bool operator==(const exponential_distribution &__x,
+                        const exponential_distribution &__y) { return __x.__p_ == __y.__p_; }
+
+        friend
+        bool operator!=(const exponential_distribution &__x,
+                        const exponential_distribution &__y) { return !(__x == __y); }
+    };
+
+    template<class _RealType>
+    template<class _URNG>
+    _RealType
+    exponential_distribution<_RealType>::operator()(_URNG &__g, const param_type &__p) {
+        return -std::log
+                (
+                        result_type(1) -
+                        __generate_canonical<result_type,
+                                std::numeric_limits<result_type>::digits>(__g)
+                )
+               / __p.lambda();
+    }
+
+
+// poisson distribution
+
+    template<class _IntType>
+    template<class _URNG>
+    _IntType
+    poisson_distribution<_IntType>::operator()(_URNG &__urng, const param_type &__pr) {
+        result_type __x;
+        uniform_real_distribution<double> __urd;
+        if (__pr.__mean_ < 10) {
+            __x = 0;
+            for (double __p = __urd(__urng); __p > __pr.__l_; ++__x)
+                __p *= __urd(__urng);
+        } else {
+            double __difmuk;
+            double __g = __pr.__mean_ + __pr.__s_ * normal_distribution<double>()(__urng);
+            double __u;
+            if (__g > 0) {
+                __x = static_cast<result_type>(__g);
+                if (__x >= __pr.__l_)
+                    return __x;
+                __difmuk = __pr.__mean_ - __x;
+                __u = __urd(__urng);
+                if (__pr.__d_ * __u >= __difmuk * __difmuk * __difmuk)
+                    return __x;
+            }
+            exponential_distribution<double> __edist;
+            for (bool __using_exp_dist = false; true; __using_exp_dist = true) {
+                double __e;
+                if (__using_exp_dist || __g < 0) {
+                    double __t;
+                    do {
+                        __e = __edist(__urng);
+                        __u = __urd(__urng);
+                        __u += __u - 1;
+                        __t = 1.8 + (__u < 0 ? -__e : __e);
+                    } while (__t <= -.6744);
+                    __x = __pr.__mean_ + __pr.__s_ * __t;
+                    __difmuk = __pr.__mean_ - __x;
+                    __using_exp_dist = true;
+                }
+                double __px;
+                double __py;
+                if (__x < 10) {
+                    const double __fac[] = {1, 1, 2, 6, 24, 120, 720, 5040,
+                                            40320, 362880};
+                    __px = -__pr.__mean_;
+                    __py = std::pow(__pr.__mean_, (double) __x) / __fac[__x];
+                } else {
+                    double __del = .8333333E-1 / __x;
+                    __del -= 4.8 * __del * __del * __del;
+                    double __v = __difmuk / __x;
+                    if (std::abs(__v) > 0.25)
+                        __px = __x * std::log(1 + __v) - __difmuk - __del;
+                    else
+                        __px = __x * __v * __v * (((((((.1250060 * __v + -.1384794) *
+                                                       __v + .1421878) * __v + -.1661269) * __v + .2000118) *
+                                                    __v + -.2500068) * __v + .3333333) * __v + -.5) - __del;
+                    __py = .3989423 / std::sqrt(__x);
+                }
+                double __r = (0.5 - __difmuk) / __pr.__s_;
+                double __r2 = __r * __r;
+                double __fx = -0.5 * __r2;
+                double __fy = __pr.__omega_ * (((__pr.__c3_ * __r2 + __pr.__c2_) *
+                                                __r2 + __pr.__c1_) * __r2 + __pr.__c0_);
+                if (__using_exp_dist) {
+                    if (__pr.__c_ * std::abs(__u) <= __py * std::exp(__px + __e) -
+                                                     __fy * std::exp(__fx + __e))
+                        break;
+                } else {
+                    if (__fy - __u * __fy <= __py * std::exp(__px - __fx))
+                        break;
+                }
+            }
+        }
+        return __x;
+    }
+
+
+// discrete_distribution
+
+    template<class _IntType = int>
+    class  discrete_distribution {
+    public:
+        // types
+        typedef _IntType result_type;
+
+        class  param_type {
+            std::vector<double> __p_;
+        public:
+            typedef discrete_distribution distribution_type;
+
+
+            param_type() {}
+
+            template<class _InputIterator>
+
+            param_type(_InputIterator __f, _InputIterator __l)
+                    : __p_(__f, __l) { __init(); }
+
+#ifndef _LIBCPP_CXX03_LANG
+
+
+            param_type(std::initializer_list<double> __wl)
+                    : __p_(__wl.begin(), __wl.end()) { __init(); }
+
+#endif  // _LIBCPP_CXX03_LANG
+
+            template<class _UnaryOperation>
+            param_type(size_t __nw, double __xmin, double __xmax,
+                       _UnaryOperation __fw);
+
+            std::vector<double> probabilities() const;
+
+            friend
+            bool operator==(const param_type &__x, const param_type &__y) { return __x.__p_ == __y.__p_; }
+
+            friend
+            bool operator!=(const param_type &__x, const param_type &__y) { return !(__x == __y); }
+
+        private:
+            void __init();
+
+            friend class discrete_distribution;
+
+        };
+
+    private:
+        param_type __p_;
+
+    public:
+        // constructor and reset functions
+
+        discrete_distribution() {}
+
+        template<class _InputIterator>
+
+        discrete_distribution(_InputIterator __f, _InputIterator __l)
+                : __p_(__f, __l) {}
+
+#ifndef _LIBCPP_CXX03_LANG
+
+
+        discrete_distribution(std::initializer_list<double> __wl)
+                : __p_(__wl) {}
+
+#endif  // _LIBCPP_CXX03_LANG
+
+        template<class _UnaryOperation>
+
+        discrete_distribution(size_t __nw, double __xmin, double __xmax,
+                              _UnaryOperation __fw)
+                : __p_(__nw, __xmin, __xmax, __fw) {}
+
+
+        explicit discrete_distribution(const param_type &__p)
+                : __p_(__p) {}
+
+
+        void reset() {}
+
+        // generating functions
+        template<class _URNG>
+
+        result_type operator()(_URNG &__g) { return (*this)(__g, __p_); }
+
+        template<class _URNG>
+        result_type operator()(_URNG &__g, const param_type &__p);
+
+        // property functions
+
+        std::vector<double> probabilities() const { return __p_.probabilities(); }
+
+
+        param_type param() const { return __p_; }
+
+
+        void param(const param_type &__p) { __p_ = __p; }
+
+
+        result_type min() const { return 0; }
+
+
+        result_type max() const { return __p_.__p_.size(); }
+
+        friend
+        bool operator==(const discrete_distribution &__x,
+                        const discrete_distribution &__y) { return __x.__p_ == __y.__p_; }
+
+        friend
+        bool operator!=(const discrete_distribution &__x,
+                        const discrete_distribution &__y) { return !(__x == __y); }
+
+    };
+
+    template<class _IntType>
+    template<class _UnaryOperation>
+    discrete_distribution<_IntType>::param_type::param_type(size_t __nw,
+                                                            double __xmin,
+                                                            double __xmax,
+                                                            _UnaryOperation __fw) {
+        if (__nw > 1) {
+            __p_.reserve(__nw - 1);
+            double __d = (__xmax - __xmin) / __nw;
+            double __d2 = __d / 2;
+            for (size_t __k = 0; __k < __nw; ++__k)
+                __p_.push_back(__fw(__xmin + __k * __d + __d2));
+            __init();
+        }
+    }
+
+    template<class _IntType>
+    void
+    discrete_distribution<_IntType>::param_type::__init() {
+        if (!__p_.empty()) {
+            if (__p_.size() > 1) {
+                double __s = std::accumulate(__p_.begin(), __p_.end(), 0.0);
+                for (std::vector<double>::iterator __i = __p_.begin(), __e = __p_.end();
+                     __i < __e; ++__i)
+                    *__i /= __s;
+                std::vector<double> __t(__p_.size() - 1);
+                std::partial_sum(__p_.begin(), __p_.end() - 1, __t.begin());
+                swap(__p_, __t);
+            } else {
+                __p_.clear();
+                __p_.shrink_to_fit();
+            }
+        }
+    }
+
+    template<class _IntType>
+    std::vector<double>
+    discrete_distribution<_IntType>::param_type::probabilities() const {
+        size_t __n = __p_.size();
+        std::vector<double> __p(__n + 1);
+        std::adjacent_difference(__p_.begin(), __p_.end(), __p.begin());
+        if (__n > 0)
+            __p[__n] = 1 - __p_[__n - 1];
+        else
+            __p[0] = 1;
+        return __p;
+    }
+
+    template<class _IntType>
+    template<class _URNG>
+    _IntType
+    discrete_distribution<_IntType>::operator()(_URNG &__g, const param_type &__p) {
+        uniform_real_distribution<double> __gen;
+        return static_cast<_IntType>(
+                std::upper_bound(__p.__p_.begin(), __p.__p_.end(), __gen(__g)) -
+                __p.__p_.begin());
+    }
+
+
+} // namespace random
+# endif
diff --git a/r-package/build_package.R b/r-package/build_package.R
index 323788b42b..11c35182c2 100755
--- a/r-package/build_package.R
+++ b/r-package/build_package.R
@@ -11,24 +11,9 @@ library(Rcpp)
 library(devtools)
 library(testthat)
 library(roxygen2)
-library(lintr)
 
 package.name <- "grf"
 
-# Check code style consistency
-linters <- with_defaults(
-  line_length_linter(120), # Max line length = 120
-  object_name_linter = NULL, # To allow variable.names + function_names
-  commented_code_linter = NULL, # Misc. false positives
-  object_usage_linter = NULL, # Misc. false positives
-  spaces_left_parentheses_linter = NULL, # Misc. false positives
-  default = default_linters);
-
-lint_res <- lint_package(path = package.name, linters = linters, exclusions = file.path(package.name, "R/RcppExports.R"))
-lint_res
-
-if (length(lint_res) > 0) quit(status = 1)
-
 # If built for CRAN, exlude all test except ones with "cran" in the filename
 # by adding the following regex to .Rbuildignore.
 if (!is.na(args[1]) && args[1] == "--as-cran") {
diff --git a/r-package/grf/R/causal_tuning.R b/r-package/grf/R/causal_tuning.R
index 3e4c17c1a6..3d1993a124 100644
--- a/r-package/grf/R/causal_tuning.R
+++ b/r-package/grf/R/causal_tuning.R
@@ -84,7 +84,7 @@
 #' @export
 tune_causal_forest <- function(X, Y, W, Y.hat, W.hat,
                                sample.weights = NULL,
-                               num.fit.trees = 200,
+                               num.fit.trees = 100,
                                num.fit.reps = 50,
                                num.optimize.reps = 1000,
                                min.node.size = NULL,
@@ -123,6 +123,14 @@ tune_causal_forest <- function(X, Y, W, Y.hat, W.hat,
   all.params <- get_initial_params(min.node.size, sample.fraction, mtry, alpha, imbalance.penalty)
   fixed.params <- all.params[!is.na(all.params)]
   tuning.params <- all.params[is.na(all.params)]
+  default.params <- c(
+    min.node.size = validate_min_node_size(min.node.size),
+    sample.fraction = validate_sample_fraction(sample.fraction),
+    mtry = validate_mtry(mtry, X),
+    alpha = validate_alpha(alpha),
+    imbalance.penalty = validate_imbalance_penalty(imbalance.penalty)
+  )
+  default.params[!is.na(all.params)] <- all.params[!is.na(all.params)]
 
   if (length(tuning.params) == 0) {
     return(list("error" = NA, "params" = c(all.params)))
@@ -134,7 +142,7 @@ tune_causal_forest <- function(X, Y, W, Y.hat, W.hat,
   colnames(fit.draws) <- names(tuning.params)
   compute.oob.predictions <- TRUE
 
-  debiased.errors <- apply(fit.draws, 1, function(draw) {
+  small.forest.errors <- apply(fit.draws, 1, function(draw) {
     params <- c(fixed.params, get_params_from_draw(X, draw))
     small.forest <- causal_train(
       data$default, data$sparse,
@@ -165,35 +173,147 @@ tune_causal_forest <- function(X, Y, W, Y.hat, W.hat,
     mean(prediction$debiased.error, na.rm = TRUE)
   })
 
+  if (any(is.na(small.forest.errors))) {
+    warning(paste0(
+      "Could not tune causal forest because some small forest error estimates were NA.\n",
+      "Consider increasing tuning argument num.fit.trees."
+    ))
+    out <- get_tuning_output(params = c(default.params), status = "failure")
+    return(out)
+  }
+
+  if (sd(small.forest.errors) / mean(small.forest.errors) < 1e-10) {
+    warning(paste0(
+      "Could not tune causal forest because small forest errors were nearly constant.\n",
+      "Consider increasing argument num.fit.trees."
+    ))
+    out <- get_tuning_output(params = c(default.params), status = "failure")
+    return(out)
+  }
+
   # Fit the 'dice kriging' model to these error estimates.
   # Note that in the 'km' call, the kriging package prints a large amount of information
   # about the fitting process. Here, capture its console output and discard it.
-  variance.guess <- rep(var(debiased.errors) / 2, nrow(fit.draws))
-  env <- new.env()
-  capture.output(env$kriging.model <-
-    DiceKriging::km(
-      design = data.frame(fit.draws),
-      response = debiased.errors,
-      noise.var = variance.guess
-    ))
-  kriging.model <- env$kriging.model
+  variance.guess <- rep(var(small.forest.errors) / 2, nrow(fit.draws))
+  kriging.model <- tryCatch(
+    {
+      capture.output(
+        model <- DiceKriging::km(
+          design = data.frame(fit.draws),
+          response = small.forest.errors,
+          noise.var = variance.guess
+        )
+      )
+      model
+    },
+    error = function(e) {
+      warning(paste0("Dicekriging threw the following error during causal tuning: \n", e))
+      return(NULL)
+    }
+  )
+  if (is.null(kriging.model)) {
+    warning("Tuning was attempted but failed. Reverting to default parameters.")
+    out <- get_tuning_output(params = default.parameters, status = "failure")
+    return(out)
+  }
 
   # To determine the optimal parameter values, predict using the kriging model at a large
   # number of random values, then select those that produced the lowest error.
   optimize.draws <- matrix(runif(num.optimize.reps * num.params), num.optimize.reps, num.params)
   colnames(optimize.draws) <- names(tuning.params)
-  model.surface <- predict(kriging.model, newdata = data.frame(optimize.draws), type = "SK")
-
+  model.surface <- predict(kriging.model, newdata = data.frame(optimize.draws), type = "SK")$mean
   tuned.params <- get_params_from_draw(X, optimize.draws)
-  grid <- cbind(error = model.surface$mean, tuned.params)
-  optimal.draw <- which.min(grid[, "error"])
-  optimal.param <- grid[optimal.draw, ]
 
-  out <- list(
-    error = optimal.param[1], params = c(fixed.params, optimal.param[-1]),
-    grid = grid
+  grid <- cbind(error = c(model.surface), tuned.params)
+  small.forest.optimal.draw <- which.min(grid[, "error"])
+  small.forest.optimal.params <- grid[small.forest.optimal.draw, -1]
+
+  # To avoid the possibility of selection bias, re-train a moderately-sized forest
+  # at the value chosen by the method above
+  retrained.forest.params <- c(fixed.params, grid[small.forest.optimal.draw, -1])
+  retrained.forest.num.trees <- num.fit.trees * 4
+  retrained.forest <- causal_train(
+    data$default, data$sparse,
+    outcome.index, treatment.index, sample.weight.index,
+    !is.null(sample.weights),
+    retrained.forest.params["mtry"],
+    retrained.forest.num.trees,
+    retrained.forest.params["min.node.size"],
+    retrained.forest.params["sample.fraction"],
+    honesty,
+    coerce_honesty_fraction(honesty.fraction),
+    prune.empty.leaves,
+    ci.group.size,
+    reduced.form.weight,
+    retrained.forest.params["alpha"],
+    retrained.forest.params["imbalance.penalty"],
+    stabilize.splits,
+    clusters,
+    samples.per.cluster,
+    compute.oob.predictions,
+    num.threads,
+    seed
+  )
+
+  retrained.forest.prediction <- causal_predict_oob(
+    retrained.forest, data$default, data$sparse,
+    outcome.index, treatment.index, num.threads, FALSE
   )
-  class(out) <- c("tuning_output")
+
+  retrained.forest.error <- mean(retrained.forest.prediction$debiased.error, na.rm = TRUE)
+
+
+  # Train a forest with default parameters, and check its predicted error.
+  # This improves our chances of not doing worse than default
+  num.default.forest.trees <- num.fit.trees * 4
+
+  default.forest <- causal_train(
+    data$default, data$sparse,
+    outcome.index, treatment.index, sample.weight.index,
+    !is.null(sample.weights),
+    default.params["mtry"],
+    num.default.forest.trees,
+    default.params["min.node.size"],
+    default.params["sample.fraction"],
+    honesty,
+    coerce_honesty_fraction(honesty.fraction),
+    prune.empty.leaves,
+    ci.group.size,
+    reduced.form.weight,
+    default.params["alpha"],
+    default.params["imbalance.penalty"],
+    stabilize.splits,
+    clusters,
+    samples.per.cluster,
+    compute.oob.predictions,
+    num.threads,
+    seed
+  )
+
+  default.forest.prediction <- causal_predict_oob(
+    default.forest, data$default, data$sparse,
+    outcome.index, treatment.index, num.threads, FALSE
+  )
+
+  default.forest.error <- mean(default.forest.prediction$debiased.error, na.rm = TRUE)
+
+  # Now compare predicted errors: default parameters vs retrained parameter
+  if (default.forest.error < retrained.forest.error)
+  {
+    out <- get_tuning_output(
+      error = default.forest.error,
+      params = default.params,
+      grid = NA,
+      status = "default"
+    )
+  } else {
+    out <- get_tuning_output(
+      error = retrained.forest.error,
+      params = retrained.forest.params,
+      grid = grid,
+      status = "tuned"
+    )
+  }
 
   out
 }
diff --git a/r-package/grf/R/plot.R b/r-package/grf/R/plot.R
index ff05fd9977..6e9ea89073 100644
--- a/r-package/grf/R/plot.R
+++ b/r-package/grf/R/plot.R
@@ -3,7 +3,7 @@
 #' If it is a non-leaf node: show its splitting variable and splitting value
 #' @param tree the tree to convert
 #' @param index the index of the current node
-#' @keyword internal
+#' @keywords internal
 create_dot_body <- function(tree, index = 1) {
   node <- tree$nodes[[index]]
 
@@ -68,7 +68,7 @@ size = ", num_samples, '"];')
 #' This function generates a GraphViz representation of the tree,
 #' which is then written into `dot_string`.
 #' @param tree the tree to convert
-#' @keyword internal
+#' @keywords internal
 export_graphviz <- function(tree) {
   header <- "digraph nodes { \n node [shape=box] ;"
   footer <- "}"
diff --git a/r-package/grf/R/print.R b/r-package/grf/R/print.R
index 949a9a085e..2ab3851000 100644
--- a/r-package/grf/R/print.R
+++ b/r-package/grf/R/print.R
@@ -94,30 +94,47 @@ print.boosted_regression_forest <- function(x, ...) {
 #' @importFrom stats aggregate quantile
 #' @export
 print.tuning_output <- function(x, tuning.quantiles = seq(0, 1, 0.2), ...) {
-  grid <- x$grid
-  out <- lapply(colnames(grid)[-1], function(name) {
-    q <- quantile(grid[, name], probs = tuning.quantiles)
-    # Cannot form for example quintiles for mtry if the number of variables is
-    # less than 5, so here we just truncate the groups.
-    if (length(unique(q) < length(q))) {
-      q <- unique(q)
-    }
-    rank <- cut(grid[, name], q, include.lowest = TRUE)
-    out <- aggregate(grid[, "error"], by = list(rank), FUN = mean)
-    colnames(out) <- c(name, "error")
-    out
-  })
-
-  err <- x$error
-  params <- x$params[colnames(grid)[-1]]
-  opt <- formatC(c(err, params))
-
-  cat("Optimal tuning parameters: \n")
-  cat(paste0(names(opt), ": ", opt, "\n"))
-
-  cat("Average error by ", length(tuning.quantiles) - 1, "-quantile:\n", sep = "")
-  for (i in out) {
+  if (x$status == "failure") {
+    cat("Tuning status: failure.\n")
+    cat("This indicates tuning was attempted but failed due to an error, and we fell back to default parameters: \n\n")
+    params <- x$params
+    cat(paste0(names(params), ": ", params, "\n"))
+  } else if (x$status == "default") {
+    cat("Tuning status: default.\n")
+    cat("This indicates tuning was attempted. ")
+    cat("However, we could not find parameters that were expected to perform better than default: \n\n")
+    params <- x$params
+    cat(paste0(names(params), ": ", params, "\n"))
+  } else if (x$status == "tuned") {
+    cat("Tuning status: tuned.\n")
+    cat("This indicates tuning found parameters that are expected to perform better than default. \n\n")
+    grid <- x$grid
+    out <- lapply(colnames(grid)[-1], function(name) {
+      q <- quantile(grid[, name], probs = tuning.quantiles)
+      # Cannot form for example quintiles for mtry if the number of variables is
+      # less than 5, so here we just truncate the groups.
+      if (length(unique(q) < length(q))) {
+        q <- unique(q)
+      }
+      rank <- cut(grid[, name], q, include.lowest = TRUE)
+      out <- aggregate(grid[, "error"], by = list(rank), FUN = mean)
+      colnames(out) <- c(name, "error")
+      out
+    })
+
+    cat(paste0("Predicted debiased error: ", x$error, "\n\n"))
+
+    cat("Tuned parameters: \n")
+    cat(paste0(names(x$params), ": ", x$params, "\n"))
     cat("\n")
-    print(i, row.names = FALSE)
+
+    cat("Average error by ", length(tuning.quantiles) - 1, "-quantile:\n", sep = "")
+    for (i in out) {
+      cat("\n")
+      print(i, row.names = FALSE)
+    }
+  } else {
+    stop(paste0("Error while reading tuning output. ",
+                "Parameter 'status' must be one of 'failure', 'default', or 'tuned'"))
   }
 }
diff --git a/r-package/grf/R/regression_tuning.R b/r-package/grf/R/regression_tuning.R
index bc1dc61f91..5b8b3a61e8 100644
--- a/r-package/grf/R/regression_tuning.R
+++ b/r-package/grf/R/regression_tuning.R
@@ -109,6 +109,14 @@ tune_regression_forest <- function(X, Y,
 
   fixed.params <- all.params[!is.na(all.params)]
   tuning.params <- all.params[is.na(all.params)]
+  default.params <- c(
+    min.node.size = validate_min_node_size(min.node.size),
+    sample.fraction = validate_sample_fraction(sample.fraction),
+    mtry = validate_mtry(mtry, X),
+    alpha = validate_alpha(alpha),
+    imbalance.penalty = validate_imbalance_penalty(imbalance.penalty)
+  )
+  default.params[!is.na(all.params)] <- all.params[!is.na(all.params)]
 
   if (length(tuning.params) == 0) {
     return(list("error" = NA, "params" = c(all.params)))
@@ -120,7 +128,7 @@ tune_regression_forest <- function(X, Y,
   colnames(fit.draws) <- names(tuning.params)
   compute.oob.predictions <- TRUE
 
-  debiased.errors <- apply(fit.draws, 1, function(draw) {
+  small.forest.errors <- apply(fit.draws, 1, function(draw) {
     params <- c(fixed.params, get_params_from_draw(X, draw))
     small.forest <- regression_train(
       data$default, data$sparse, outcome.index, sample.weight.index,
@@ -150,35 +158,139 @@ tune_regression_forest <- function(X, Y,
     mean(error, na.rm = TRUE)
   })
 
+  if (any(is.na(small.forest.errors))) {
+    warning(paste0(
+      "Could not tune causal forest because all small forest error estimates were NA.\n",
+      "Consider increasing tuning argument num.fit.trees."
+    ))
+    out <- get_tuning_output(params = c(all.params), status = "failure")
+    return(out)
+  }
+
+  if (sd(small.forest.errors) / mean(small.forest.errors) < 1e-10) {
+    warning(paste0(
+      "Could not tune causal forest because small forest errors were nearly constant.\n",
+      "Consider increasing argument num.fit.trees."
+    ))
+    out <- get_tuning_output(params = c(all.params), status = "failure")
+    return(out)
+  }
+
   # Fit the 'dice kriging' model to these error estimates.
   # Note that in the 'km' call, the kriging package prints a large amount of information
-  # about the fitting process. Here, capture its console output and discard it.
-  variance.guess <- rep(var(debiased.errors) / 2, nrow(fit.draws))
-  env <- new.env()
-  capture.output(env$kriging.model <-
-    DiceKriging::km(
-      design = data.frame(fit.draws),
-      response = debiased.errors,
-      noise.var = variance.guess
-    ))
-  kriging.model <- env$kriging.model
+  # about the fitting process. Heres, capture its console output and discard it.
+  variance.guess <- rep(var(small.forest.errors) / 2, nrow(fit.draws))
+  kriging.model <- tryCatch(
+    {
+      capture.output(
+        model <- DiceKriging::km(
+          design = data.frame(fit.draws),
+          response = small.forest.errors,
+          noise.var = variance.guess
+        )
+      )
+      model
+    },
+    error = function(e) {
+      warning(paste0("Dicekriging threw the following error during regression tuning: \n", e))
+      return(NULL)
+    }
+  )
+  if (is.null(kriging.model)) {
+    warning("Tuning was attempted but failed. Reverting to default parameters.")
+    out <- get_tuning_output(params = default.parameters, status = "failure")
+    return(out)
+  }
 
   # To determine the optimal parameter values, predict using the kriging model at a large
   # number of random values, then select those that produced the lowest error.
   optimize.draws <- matrix(runif(num.optimize.reps * num.params), num.optimize.reps, num.params)
   colnames(optimize.draws) <- names(tuning.params)
-  model.surface <- predict(kriging.model, newdata = data.frame(optimize.draws), type = "SK")
-
+  model.surface <- predict(kriging.model, newdata = data.frame(optimize.draws), type = "SK")$mean
   tuned.params <- get_params_from_draw(X, optimize.draws)
-  grid <- cbind(error = model.surface$mean, tuned.params)
-  optimal.draw <- which.min(grid[, "error"])
-  optimal.param <- grid[optimal.draw, ]
 
-  out <- list(
-    error = optimal.param[1], params = c(fixed.params, optimal.param[-1]),
-    grid = grid
+  grid <- cbind(error = c(model.surface), tuned.params)
+  small.forest.optimal.draw <- which.min(grid[, "error"])
+  small.forest.optimal.params <- grid[small.forest.optimal.draw, -1]
+
+  # To avoid the possibility of selection bias, re-train a moderately-sized forest
+  # at the value chosen by the method above
+  retrained.forest.params <- c(fixed.params, grid[small.forest.optimal.draw, -1])
+  retrained.forest.num.trees <- num.fit.trees * 4
+  retrained.forest <- regression_train(
+    data$default, data$sparse, outcome.index, sample.weight.index,
+    !is.null(sample.weights),
+    as.numeric(retrained.forest.params["mtry"]),
+    num.fit.trees,
+    as.numeric(retrained.forest.params["min.node.size"]),
+    as.numeric(retrained.forest.params["sample.fraction"]),
+    honesty,
+    coerce_honesty_fraction(honesty.fraction),
+    prune.empty.leaves,
+    ci.group.size,
+    as.numeric(retrained.forest.params["alpha"]),
+    as.numeric(retrained.forest.params["imbalance.penalty"]),
+    clusters,
+    samples.per.cluster,
+    compute.oob.predictions,
+    num.threads,
+    seed
+  )
+
+  retrained.forest.prediction <- regression_predict_oob(
+    retrained.forest, data$default, data$sparse,
+    outcome.index, num.threads, FALSE
   )
-  class(out) <- c("tuning_output")
+
+  retrained.forest.error <- mean(retrained.forest.prediction$debiased.error, na.rm = TRUE)
+
+  # Train a forest with default parameters, and check its predicted error.
+  # This improves our chances of not doing worse than default
+  num.default.forest.trees <- num.fit.trees * 4
+
+  default.forest <- regression_train(
+    data$default, data$sparse, outcome.index, sample.weight.index,
+    !is.null(sample.weights),
+    as.numeric(default.params["mtry"]),
+    num.fit.trees,
+    as.numeric(default.params["min.node.size"]),
+    as.numeric(default.params["sample.fraction"]),
+    honesty,
+    coerce_honesty_fraction(honesty.fraction),
+    prune.empty.leaves,
+    ci.group.size,
+    as.numeric(default.params["alpha"]),
+    as.numeric(default.params["imbalance.penalty"]),
+    clusters,
+    samples.per.cluster,
+    compute.oob.predictions,
+    num.threads,
+    seed
+  )
+
+  default.forest.prediction <- regression_predict_oob(
+      default.forest, data$default, data$sparse,
+      outcome.index, num.threads, FALSE
+  )
+
+  default.forest.error <- mean(default.forest.prediction$debiased.error, na.rm = TRUE)
+
+  # Now compare predicted errors: default parameters vs retrained parameter
+  if (default.forest.error < retrained.forest.error) {
+    out <- get_tuning_output(
+      error = default.forest.error,
+      params = default.params,
+      grid = NA,
+      status = "default"
+    )
+  } else {
+    out <- get_tuning_output(
+      error = retrained.forest.error,
+      params = retrained.forest.params,
+      grid = grid,
+      status = "tuned"
+    )
+  }
 
   out
 }
diff --git a/r-package/grf/R/tuning_utilities.R b/r-package/grf/R/tuning_utilities.R
index da0949a228..a84a3793dc 100644
--- a/r-package/grf/R/tuning_utilities.R
+++ b/r-package/grf/R/tuning_utilities.R
@@ -33,3 +33,9 @@ get_params_from_draw <- function(X, draws) {
     }
   }, FUN.VALUE = numeric(n))
 }
+
+get_tuning_output <- function(status, params, error = NA, grid = NA) {
+  out <- list(status = status, params = params, error = error, grid = grid)
+  class(out) <- c("tuning_output")
+  out
+}
diff --git a/r-package/grf/src/random b/r-package/grf/src/random
new file mode 120000
index 0000000000..7c66ed57ec
--- /dev/null
+++ b/r-package/grf/src/random
@@ -0,0 +1 @@
+../../../core/third_party/random
\ No newline at end of file
diff --git a/r-package/grf/tests/testthat/test_regression_forest.R b/r-package/grf/tests/testthat/test_regression_forest.R
index cc2e6fbe09..ef2751afcc 100755
--- a/r-package/grf/tests/testthat/test_regression_forest.R
+++ b/r-package/grf/tests/testthat/test_regression_forest.R
@@ -221,8 +221,8 @@ test_that("a non-pruned honest regression forest has lower MSE than a pruned hon
   X <- matrix(rnorm(n * p), n, p)
   Y <- abs(X[, 1]) + 0.1 * rnorm(n)
 
-  f1 <- regression_forest(X, Y, honesty = TRUE, honesty.fraction = 0.9, prune = TRUE)
-  f2 <- regression_forest(X, Y, honesty = TRUE, honesty.fraction = 0.9, prune = FALSE)
+  f1 <- regression_forest(X, Y, honesty = TRUE, honesty.fraction = 0.9, prune.empty.leaves = TRUE)
+  f2 <- regression_forest(X, Y, honesty = TRUE, honesty.fraction = 0.9, prune.empty.leaves = FALSE)
 
   mse.pruned <- mean((predict(f1)$predictions - Y)^2)
   mse.notpruned <- mean((predict(f2)$predictions - Y)^2)