Skip to content

Commit

Permalink
Merge branch 'master' into hsim
Browse files Browse the repository at this point in the history
* master:
  Fix a typo in the `@keywords` roxygen directive. (grf-labs#481)
  Ensuring DiceKriging does not break forest training (grf-labs#455)
  CI: Disable lintr (grf-labs#480)
  Platform-independent random numbers [using stdlib copy] (grf-labs#469)
  test_regression_forest.R: fix argument name (grf-labs#477)
  CI: Only warn on lint error (grf-labs#474)
  • Loading branch information
erikcs committed Aug 16, 2019
2 parents 9d3f85a + 584d856 commit b2c911f
Show file tree
Hide file tree
Showing 19 changed files with 1,544 additions and 100 deletions.
3 changes: 0 additions & 3 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@ r_binary_packages:
- devtools
- testthat
- roxygen2
r_github_packages:
- jimhester/lintr

matrix:
include:
Expand Down Expand Up @@ -64,7 +62,6 @@ matrix:
- valgrind --leak-check=full --error-exitcode=1 ./build/grf exclude:[characterization]

- name: "grf R package"
env: LINTR_COMMENT_BOT=false
before_install:
- cd r-package/grf
script:
Expand Down
3 changes: 1 addition & 2 deletions DEVELOPING.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@ The core forest implementation is written in C++, with an R interface powered by

## R package

To build the R package from source, cd into `r-package` and run `build_package.R`. Required development dependencies are listed there
(note: it is recommended to install the latest [lintr](https://github.com/jimhester/lintr) package with `devtools::install_github("jimhester/lintr")`). This mimics the tests run when submitting a pull request.
To build the R package from source, cd into `r-package` and run `build_package.R`. Required development dependencies are listed there. This mimics the tests run when submitting a pull request.

An alternative development workflow is to use the accompanying grf.Rproj and build and test the package with RStudio's build menu, which can be convenient
for quickly iterating C++/R code changes.
Expand Down
1 change: 0 additions & 1 deletion core/src/commons/utility.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
#include <sstream>
#include <unordered_set>
#include <unordered_map>
#include <random>

#include "utility.h"
#include "SparseData.h"
Expand Down
1 change: 0 additions & 1 deletion core/src/commons/utility.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
#include <iostream>
#include <fstream>
#include <algorithm>
#include <random>
#include <unordered_set>
#include <unordered_map>

Expand Down
3 changes: 2 additions & 1 deletion core/src/forest/ForestTrainer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

#include "commons/utility.h"
#include "ForestTrainer.h"
#include "random/random.hpp"

ForestTrainer::ForestTrainer(std::shared_ptr<RelabelingStrategy> relabeling_strategy,
std::shared_ptr<SplittingRuleFactory> splitting_rule_factory,
Expand Down Expand Up @@ -87,7 +88,7 @@ std::vector<std::shared_ptr<Tree>> ForestTrainer::train_batch(
size_t ci_group_size = options.get_ci_group_size();

std::mt19937_64 random_number_generator(options.get_random_seed() + start);
std::uniform_int_distribution<uint> udist;
nonstd::uniform_int_distribution<uint> udist;
std::vector<std::shared_ptr<Tree>> trees;

if (ci_group_size == 1) {
Expand Down
17 changes: 9 additions & 8 deletions core/src/sampling/RandomSampler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#include <algorithm>
#include <unordered_map>
#include <random>

#include "RandomSampler.h"

Expand Down Expand Up @@ -52,7 +53,7 @@ void RandomSampler::subsample(const std::vector<size_t>& samples,
double sample_fraction,
std::vector<size_t>& subsamples) {
std::vector<size_t> shuffled_sample(samples);
std::shuffle(shuffled_sample.begin(), shuffled_sample.end(), random_number_generator);
nonstd::shuffle(shuffled_sample.begin(), shuffled_sample.end(), random_number_generator);

uint subsample_size = (uint) std::ceil(samples.size() * sample_fraction);
subsamples.resize(subsample_size);
Expand All @@ -66,7 +67,7 @@ void RandomSampler::subsample(const std::vector<size_t>& samples,
std::vector<size_t>& subsamples,
std::vector<size_t>& oob_samples) {
std::vector<size_t> shuffled_sample(samples);
std::shuffle(shuffled_sample.begin(), shuffled_sample.end(), random_number_generator);
nonstd::shuffle(shuffled_sample.begin(), shuffled_sample.end(), random_number_generator);

size_t subsample_size = (size_t) std::ceil(samples.size() * sample_fraction);
subsamples.resize(subsample_size);
Expand All @@ -84,7 +85,7 @@ void RandomSampler::subsample_with_size(const std::vector<size_t>& samples,
size_t subsample_size,
std::vector<size_t>& subsamples) {
std::vector<size_t> shuffled_sample(samples);
std::shuffle(shuffled_sample.begin(), shuffled_sample.end(), random_number_generator);
nonstd::shuffle(shuffled_sample.begin(), shuffled_sample.end(), random_number_generator);

subsamples.resize(subsample_size);
std::copy(shuffled_sample.begin(),
Expand Down Expand Up @@ -133,7 +134,7 @@ void RandomSampler::shuffle_and_split(std::vector<size_t>& samples,

// Fill with 0..n_all-1 and shuffle
std::iota(samples.begin(), samples.end(), 0);
std::shuffle(samples.begin(), samples.end(), random_number_generator);
nonstd::shuffle(samples.begin(), samples.end(), random_number_generator);

samples.resize(size);
}
Expand All @@ -159,7 +160,7 @@ void RandomSampler::draw_simple(std::vector<size_t>& result,
std::vector<bool> temp;
temp.resize(max, false);

std::uniform_int_distribution<size_t> unif_dist(0, max - 1 - skip.size());
nonstd::uniform_int_distribution<size_t> unif_dist(0, max - 1 - skip.size());
for (size_t i = 0; i < num_samples; ++i) {
size_t draw;
do {
Expand Down Expand Up @@ -191,7 +192,7 @@ void RandomSampler::draw_fisher_yates(std::vector<size_t>& result,

// Draw without replacement using Fisher Yates algorithm
for (size_t i = result.size() - 1; i > 0; --i) {
std::uniform_int_distribution<size_t> distribution(0, i);
nonstd::uniform_int_distribution<size_t> distribution(0, i);
size_t j = distribution(random_number_generator);
std::swap(result[i], result[j]);
}
Expand All @@ -210,7 +211,7 @@ void RandomSampler::draw_weighted(std::vector<size_t>& result,
std::vector<bool> temp;
temp.resize(max + 1, false);

std::discrete_distribution<> weighted_dist(weights.begin(), weights.end());
nonstd::discrete_distribution<> weighted_dist(weights.begin(), weights.end());
for (size_t i = 0; i < num_samples; ++i) {
size_t draw;
do {
Expand All @@ -222,6 +223,6 @@ void RandomSampler::draw_weighted(std::vector<size_t>& result,
}

size_t RandomSampler::sample_poisson(size_t mean) {
std::poisson_distribution<size_t> distribution(mean);
nonstd::poisson_distribution<size_t> distribution(mean);
return distribution(random_number_generator);
}
2 changes: 2 additions & 0 deletions core/src/sampling/RandomSampler.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
#include "commons/globals.h"
#include "commons/utility.h"
#include "SamplingOptions.h"
#include "random/random.hpp"
#include "random/algorithm.hpp"

#include <cstddef>
#include <random>
Expand Down
1 change: 0 additions & 1 deletion core/src/tree/Tree.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
#define GRF_TREE_H_

#include <vector>
#include <random>
#include <iostream>

#include "commons/globals.h"
Expand Down
73 changes: 73 additions & 0 deletions core/third_party/random/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# third_party/random

## What are these files?

They are copies of `random` and `algorithm` headers from the [llvm](https://github.com/llvm-mirror/libcxx/blob/master/include/) standard library.


## Motivation

Users complained about stability of random numbers across machines when setting seed across different platforms. See issue [#379](https://github.com/grf-labs/grf/issues/379).

As [pointed out](https://github.com/grf-labs/grf/issues/379#issuecomment-480641123) by @jtibshirani:

> the mersenne twister has the same implementation across platforms, the other random methods may differ from compiler to compiler

In PR [#469](https://github.com/grf-labs/grf/pull/469), @halflearned included this reduced copy of the relevant headers, ensuring random number generation is done in a consistent way across compilers.


## How to reproduce

#### File `random.hpp`

Extract only the relevant classes and functions from `random.hpp`:

+ `__independent_bits_engine`
+ `__clz`
+ `__log2`
+ `__log2_imp`
+ `__generate_canonical`
+ `uniform_int_distribution`
+ `poisson_distribution`
+ `uniform_real_distribution`
+ `normal_distribution`
+ `exponential_distribution`
+ `discrete_distribution`

From each class, remove all methods associated with `operator<<`.

Find and remove the following `_LIBCPP` macros:
+ `_LIBCPP_BEGIN_NAMESPACE_STD`
+ `_LIBCPP_CONSTEXPR`
+ `_LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK`
+ `_LIBCPP_END_NAMESPACE_STD`
+ `_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER`
+ `_LIBCPP_INLINE_VISIBILITY`
+ `_LIBCPP_MSVCRT`
+ `_LIBCPP_POP_MACROS`
+ `_LIBCPP_PUSH_MACROS`
+ `_LIBCPP_RANDOM`
+ `_LIBCPP_TEMPLATE_VIS`
+ `_LIBCPP_TYPE_VIS`
+ `_LIBCPP_USING_DEV_RANDOM`

Find and replace prefix:
+ `_VSTD::` -> `std::`

Add `namespace nonstd`.


#### File `algorithm.hpp`

Include modified `random.hpp`

Extract relevant class:

+ `shuffle`

Replace prefix:

+ `std::uniform_int_distribution` -> `nonstd::uniform_int_distribution`

Add `namespace nonstd`.
35 changes: 35 additions & 0 deletions core/third_party/random/algorithm.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#include <limits>
#include <cmath>
#include <algorithm>
#include <iterator>
#include "random/random.hpp"

#ifndef _GRFSTD_ALGORITHM
#define _GRFSTD_ALGORITHM

namespace nonstd {

template<class _RandomAccessIterator, class _UniformRandomNumberGenerator>
void shuffle(_RandomAccessIterator __first, _RandomAccessIterator __last,
#ifndef _LIBCPP_CXX03_LANG
_UniformRandomNumberGenerator &&__g)
#else
_UniformRandomNumberGenerator& __g)
#endif
{
typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type difference_type;
typedef nonstd::uniform_int_distribution<ptrdiff_t> _Dp;
typedef typename _Dp::param_type _Pp;
difference_type __d = __last - __first;
if (__d > 1) {
_Dp __uid;
for (--__last, --__d; __first < __last; ++__first, --__d) {
difference_type __i = __uid(__g, _Pp(0, __d));
if (__i != difference_type(0))
std::swap(*__first, *(__first + __i));
}
}
}
}

#endif // _GRFSTD_ALGORITHM
Loading

0 comments on commit b2c911f

Please sign in to comment.