From 2538e5a64e9b3facf5a4230b4e40ec41d5402aa5 Mon Sep 17 00:00:00 2001 From: Erik Sverdrup Date: Tue, 20 Aug 2019 21:01:23 -0700 Subject: [PATCH 1/5] Add src/RcppExports.cpp to version control (#489) --- r-package/.gitignore | 3 - r-package/grf/src/RcppExports.cpp | 516 ++++++++++++++++++++++++++++++ 2 files changed, 516 insertions(+), 3 deletions(-) create mode 100644 r-package/grf/src/RcppExports.cpp diff --git a/r-package/.gitignore b/r-package/.gitignore index 38aadce35..f75731239 100644 --- a/r-package/.gitignore +++ b/r-package/.gitignore @@ -23,8 +23,5 @@ # clion files .idea/ -# Generated files -grf/src/RcppExports.cpp - # documentation by roxygen *.Rd diff --git a/r-package/grf/src/RcppExports.cpp b/r-package/grf/src/RcppExports.cpp new file mode 100644 index 000000000..49375fbae --- /dev/null +++ b/r-package/grf/src/RcppExports.cpp @@ -0,0 +1,516 @@ +// Generated by using Rcpp::compileAttributes() -> do not edit by hand +// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +#include +#include + +using namespace Rcpp; + +// compute_split_frequencies +Rcpp::NumericMatrix compute_split_frequencies(Rcpp::List forest_object, size_t max_depth); +RcppExport SEXP _grf_compute_split_frequencies(SEXP forest_objectSEXP, SEXP max_depthSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::List >::type forest_object(forest_objectSEXP); + Rcpp::traits::input_parameter< size_t >::type max_depth(max_depthSEXP); + rcpp_result_gen = Rcpp::wrap(compute_split_frequencies(forest_object, max_depth)); + return rcpp_result_gen; +END_RCPP +} +// compute_weights +Eigen::SparseMatrix compute_weights(Rcpp::List forest_object, Rcpp::NumericMatrix train_matrix, Eigen::SparseMatrix sparse_train_matrix, Rcpp::NumericMatrix test_matrix, Eigen::SparseMatrix sparse_test_matrix, unsigned int num_threads); +RcppExport SEXP _grf_compute_weights(SEXP forest_objectSEXP, SEXP train_matrixSEXP, SEXP sparse_train_matrixSEXP, SEXP test_matrixSEXP, SEXP sparse_test_matrixSEXP, SEXP num_threadsSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::List >::type forest_object(forest_objectSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type train_matrix(train_matrixSEXP); + Rcpp::traits::input_parameter< Eigen::SparseMatrix >::type sparse_train_matrix(sparse_train_matrixSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type test_matrix(test_matrixSEXP); + Rcpp::traits::input_parameter< Eigen::SparseMatrix >::type sparse_test_matrix(sparse_test_matrixSEXP); + Rcpp::traits::input_parameter< unsigned int >::type num_threads(num_threadsSEXP); + rcpp_result_gen = Rcpp::wrap(compute_weights(forest_object, train_matrix, sparse_train_matrix, test_matrix, sparse_test_matrix, num_threads)); + return rcpp_result_gen; +END_RCPP +} +// compute_weights_oob +Eigen::SparseMatrix compute_weights_oob(Rcpp::List forest_object, Rcpp::NumericMatrix test_matrix, Eigen::SparseMatrix sparse_test_matrix, unsigned int num_threads); +RcppExport SEXP _grf_compute_weights_oob(SEXP forest_objectSEXP, SEXP test_matrixSEXP, SEXP sparse_test_matrixSEXP, SEXP num_threadsSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::List >::type forest_object(forest_objectSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type test_matrix(test_matrixSEXP); + Rcpp::traits::input_parameter< Eigen::SparseMatrix >::type sparse_test_matrix(sparse_test_matrixSEXP); + Rcpp::traits::input_parameter< unsigned int >::type num_threads(num_threadsSEXP); + rcpp_result_gen = Rcpp::wrap(compute_weights_oob(forest_object, test_matrix, sparse_test_matrix, num_threads)); + return rcpp_result_gen; +END_RCPP +} +// deserialize_tree +Rcpp::List deserialize_tree(Rcpp::List forest_object, size_t tree_index); +RcppExport SEXP _grf_deserialize_tree(SEXP forest_objectSEXP, SEXP tree_indexSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::List >::type forest_object(forest_objectSEXP); + Rcpp::traits::input_parameter< size_t >::type tree_index(tree_indexSEXP); + rcpp_result_gen = Rcpp::wrap(deserialize_tree(forest_object, tree_index)); + return rcpp_result_gen; +END_RCPP +} +// merge +Rcpp::List merge(const Rcpp::List forest_objects); +RcppExport SEXP _grf_merge(SEXP forest_objectsSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const Rcpp::List >::type forest_objects(forest_objectsSEXP); + rcpp_result_gen = Rcpp::wrap(merge(forest_objects)); + return rcpp_result_gen; +END_RCPP +} +// causal_train +Rcpp::List causal_train(Rcpp::NumericMatrix train_matrix, Eigen::SparseMatrix sparse_train_matrix, size_t outcome_index, size_t treatment_index, size_t sample_weight_index, bool use_sample_weights, unsigned int mtry, unsigned int num_trees, unsigned int min_node_size, double sample_fraction, bool honesty, double honesty_fraction, bool prune_empty_leaves, size_t ci_group_size, double reduced_form_weight, double alpha, double imbalance_penalty, bool stabilize_splits, std::vector clusters, unsigned int samples_per_cluster, bool compute_oob_predictions, unsigned int num_threads, unsigned int seed); +RcppExport SEXP _grf_causal_train(SEXP train_matrixSEXP, SEXP sparse_train_matrixSEXP, SEXP outcome_indexSEXP, SEXP treatment_indexSEXP, SEXP sample_weight_indexSEXP, SEXP use_sample_weightsSEXP, SEXP mtrySEXP, SEXP num_treesSEXP, SEXP min_node_sizeSEXP, SEXP sample_fractionSEXP, SEXP honestySEXP, SEXP honesty_fractionSEXP, SEXP prune_empty_leavesSEXP, SEXP ci_group_sizeSEXP, SEXP reduced_form_weightSEXP, SEXP alphaSEXP, SEXP imbalance_penaltySEXP, SEXP stabilize_splitsSEXP, SEXP clustersSEXP, SEXP samples_per_clusterSEXP, SEXP compute_oob_predictionsSEXP, SEXP num_threadsSEXP, SEXP seedSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type train_matrix(train_matrixSEXP); + Rcpp::traits::input_parameter< Eigen::SparseMatrix >::type sparse_train_matrix(sparse_train_matrixSEXP); + Rcpp::traits::input_parameter< size_t >::type outcome_index(outcome_indexSEXP); + Rcpp::traits::input_parameter< size_t >::type treatment_index(treatment_indexSEXP); + Rcpp::traits::input_parameter< size_t >::type sample_weight_index(sample_weight_indexSEXP); + Rcpp::traits::input_parameter< bool >::type use_sample_weights(use_sample_weightsSEXP); + Rcpp::traits::input_parameter< unsigned int >::type mtry(mtrySEXP); + Rcpp::traits::input_parameter< unsigned int >::type num_trees(num_treesSEXP); + Rcpp::traits::input_parameter< unsigned int >::type min_node_size(min_node_sizeSEXP); + Rcpp::traits::input_parameter< double >::type sample_fraction(sample_fractionSEXP); + Rcpp::traits::input_parameter< bool >::type honesty(honestySEXP); + Rcpp::traits::input_parameter< double >::type honesty_fraction(honesty_fractionSEXP); + Rcpp::traits::input_parameter< bool >::type prune_empty_leaves(prune_empty_leavesSEXP); + Rcpp::traits::input_parameter< size_t >::type ci_group_size(ci_group_sizeSEXP); + Rcpp::traits::input_parameter< double >::type reduced_form_weight(reduced_form_weightSEXP); + Rcpp::traits::input_parameter< double >::type alpha(alphaSEXP); + Rcpp::traits::input_parameter< double >::type imbalance_penalty(imbalance_penaltySEXP); + Rcpp::traits::input_parameter< bool >::type stabilize_splits(stabilize_splitsSEXP); + Rcpp::traits::input_parameter< std::vector >::type clusters(clustersSEXP); + Rcpp::traits::input_parameter< unsigned int >::type samples_per_cluster(samples_per_clusterSEXP); + Rcpp::traits::input_parameter< bool >::type compute_oob_predictions(compute_oob_predictionsSEXP); + Rcpp::traits::input_parameter< unsigned int >::type num_threads(num_threadsSEXP); + Rcpp::traits::input_parameter< unsigned int >::type seed(seedSEXP); + rcpp_result_gen = Rcpp::wrap(causal_train(train_matrix, sparse_train_matrix, outcome_index, treatment_index, sample_weight_index, use_sample_weights, mtry, num_trees, min_node_size, sample_fraction, honesty, honesty_fraction, prune_empty_leaves, ci_group_size, reduced_form_weight, alpha, imbalance_penalty, stabilize_splits, clusters, samples_per_cluster, compute_oob_predictions, num_threads, seed)); + return rcpp_result_gen; +END_RCPP +} +// causal_predict +Rcpp::List causal_predict(Rcpp::List forest_object, Rcpp::NumericMatrix train_matrix, Eigen::SparseMatrix sparse_train_matrix, size_t outcome_index, size_t treatment_index, Rcpp::NumericMatrix test_matrix, Eigen::SparseMatrix sparse_test_matrix, unsigned int num_threads, bool estimate_variance); +RcppExport SEXP _grf_causal_predict(SEXP forest_objectSEXP, SEXP train_matrixSEXP, SEXP sparse_train_matrixSEXP, SEXP outcome_indexSEXP, SEXP treatment_indexSEXP, SEXP test_matrixSEXP, SEXP sparse_test_matrixSEXP, SEXP num_threadsSEXP, SEXP estimate_varianceSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::List >::type forest_object(forest_objectSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type train_matrix(train_matrixSEXP); + Rcpp::traits::input_parameter< Eigen::SparseMatrix >::type sparse_train_matrix(sparse_train_matrixSEXP); + Rcpp::traits::input_parameter< size_t >::type outcome_index(outcome_indexSEXP); + Rcpp::traits::input_parameter< size_t >::type treatment_index(treatment_indexSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type test_matrix(test_matrixSEXP); + Rcpp::traits::input_parameter< Eigen::SparseMatrix >::type sparse_test_matrix(sparse_test_matrixSEXP); + Rcpp::traits::input_parameter< unsigned int >::type num_threads(num_threadsSEXP); + Rcpp::traits::input_parameter< bool >::type estimate_variance(estimate_varianceSEXP); + rcpp_result_gen = Rcpp::wrap(causal_predict(forest_object, train_matrix, sparse_train_matrix, outcome_index, treatment_index, test_matrix, sparse_test_matrix, num_threads, estimate_variance)); + return rcpp_result_gen; +END_RCPP +} +// causal_predict_oob +Rcpp::List causal_predict_oob(Rcpp::List forest_object, Rcpp::NumericMatrix train_matrix, Eigen::SparseMatrix sparse_train_matrix, size_t outcome_index, size_t treatment_index, unsigned int num_threads, bool estimate_variance); +RcppExport SEXP _grf_causal_predict_oob(SEXP forest_objectSEXP, SEXP train_matrixSEXP, SEXP sparse_train_matrixSEXP, SEXP outcome_indexSEXP, SEXP treatment_indexSEXP, SEXP num_threadsSEXP, SEXP estimate_varianceSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::List >::type forest_object(forest_objectSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type train_matrix(train_matrixSEXP); + Rcpp::traits::input_parameter< Eigen::SparseMatrix >::type sparse_train_matrix(sparse_train_matrixSEXP); + Rcpp::traits::input_parameter< size_t >::type outcome_index(outcome_indexSEXP); + Rcpp::traits::input_parameter< size_t >::type treatment_index(treatment_indexSEXP); + Rcpp::traits::input_parameter< unsigned int >::type num_threads(num_threadsSEXP); + Rcpp::traits::input_parameter< bool >::type estimate_variance(estimate_varianceSEXP); + rcpp_result_gen = Rcpp::wrap(causal_predict_oob(forest_object, train_matrix, sparse_train_matrix, outcome_index, treatment_index, num_threads, estimate_variance)); + return rcpp_result_gen; +END_RCPP +} +// ll_causal_predict +Rcpp::List ll_causal_predict(Rcpp::List forest, Rcpp::NumericMatrix input_data, Rcpp::NumericMatrix training_data, Eigen::SparseMatrix sparse_input_data, Eigen::SparseMatrix sparse_training_data, size_t outcome_index, size_t treatment_index, std::vector lambdas, bool use_weighted_penalty, std::vector linear_correction_variables, unsigned int num_threads, bool estimate_variance); +RcppExport SEXP _grf_ll_causal_predict(SEXP forestSEXP, SEXP input_dataSEXP, SEXP training_dataSEXP, SEXP sparse_input_dataSEXP, SEXP sparse_training_dataSEXP, SEXP outcome_indexSEXP, SEXP treatment_indexSEXP, SEXP lambdasSEXP, SEXP use_weighted_penaltySEXP, SEXP linear_correction_variablesSEXP, SEXP num_threadsSEXP, SEXP estimate_varianceSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::List >::type forest(forestSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type input_data(input_dataSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type training_data(training_dataSEXP); + Rcpp::traits::input_parameter< Eigen::SparseMatrix >::type sparse_input_data(sparse_input_dataSEXP); + Rcpp::traits::input_parameter< Eigen::SparseMatrix >::type sparse_training_data(sparse_training_dataSEXP); + Rcpp::traits::input_parameter< size_t >::type outcome_index(outcome_indexSEXP); + Rcpp::traits::input_parameter< size_t >::type treatment_index(treatment_indexSEXP); + Rcpp::traits::input_parameter< std::vector >::type lambdas(lambdasSEXP); + Rcpp::traits::input_parameter< bool >::type use_weighted_penalty(use_weighted_penaltySEXP); + Rcpp::traits::input_parameter< std::vector >::type linear_correction_variables(linear_correction_variablesSEXP); + Rcpp::traits::input_parameter< unsigned int >::type num_threads(num_threadsSEXP); + Rcpp::traits::input_parameter< bool >::type estimate_variance(estimate_varianceSEXP); + rcpp_result_gen = Rcpp::wrap(ll_causal_predict(forest, input_data, training_data, sparse_input_data, sparse_training_data, outcome_index, treatment_index, lambdas, use_weighted_penalty, linear_correction_variables, num_threads, estimate_variance)); + return rcpp_result_gen; +END_RCPP +} +// ll_causal_predict_oob +Rcpp::List ll_causal_predict_oob(Rcpp::List forest, Rcpp::NumericMatrix input_data, Eigen::SparseMatrix sparse_input_data, size_t outcome_index, size_t treatment_index, std::vector lambdas, bool use_weighted_penalty, std::vector linear_correction_variables, unsigned int num_threads, bool estimate_variance); +RcppExport SEXP _grf_ll_causal_predict_oob(SEXP forestSEXP, SEXP input_dataSEXP, SEXP sparse_input_dataSEXP, SEXP outcome_indexSEXP, SEXP treatment_indexSEXP, SEXP lambdasSEXP, SEXP use_weighted_penaltySEXP, SEXP linear_correction_variablesSEXP, SEXP num_threadsSEXP, SEXP estimate_varianceSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::List >::type forest(forestSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type input_data(input_dataSEXP); + Rcpp::traits::input_parameter< Eigen::SparseMatrix >::type sparse_input_data(sparse_input_dataSEXP); + Rcpp::traits::input_parameter< size_t >::type outcome_index(outcome_indexSEXP); + Rcpp::traits::input_parameter< size_t >::type treatment_index(treatment_indexSEXP); + Rcpp::traits::input_parameter< std::vector >::type lambdas(lambdasSEXP); + Rcpp::traits::input_parameter< bool >::type use_weighted_penalty(use_weighted_penaltySEXP); + Rcpp::traits::input_parameter< std::vector >::type linear_correction_variables(linear_correction_variablesSEXP); + Rcpp::traits::input_parameter< unsigned int >::type num_threads(num_threadsSEXP); + Rcpp::traits::input_parameter< bool >::type estimate_variance(estimate_varianceSEXP); + rcpp_result_gen = Rcpp::wrap(ll_causal_predict_oob(forest, input_data, sparse_input_data, outcome_index, treatment_index, lambdas, use_weighted_penalty, linear_correction_variables, num_threads, estimate_variance)); + return rcpp_result_gen; +END_RCPP +} +// custom_train +Rcpp::List custom_train(Rcpp::NumericMatrix train_matrix, Eigen::SparseMatrix sparse_train_matrix, size_t outcome_index, unsigned int mtry, unsigned int num_trees, unsigned int min_node_size, double sample_fraction, bool honesty, double honesty_fraction, bool prune_empty_leaves, size_t ci_group_size, double alpha, double imbalance_penalty, std::vector clusters, unsigned int samples_per_cluster, bool compute_oob_predictions, unsigned int num_threads, unsigned int seed); +RcppExport SEXP _grf_custom_train(SEXP train_matrixSEXP, SEXP sparse_train_matrixSEXP, SEXP outcome_indexSEXP, SEXP mtrySEXP, SEXP num_treesSEXP, SEXP min_node_sizeSEXP, SEXP sample_fractionSEXP, SEXP honestySEXP, SEXP honesty_fractionSEXP, SEXP prune_empty_leavesSEXP, SEXP ci_group_sizeSEXP, SEXP alphaSEXP, SEXP imbalance_penaltySEXP, SEXP clustersSEXP, SEXP samples_per_clusterSEXP, SEXP compute_oob_predictionsSEXP, SEXP num_threadsSEXP, SEXP seedSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type train_matrix(train_matrixSEXP); + Rcpp::traits::input_parameter< Eigen::SparseMatrix >::type sparse_train_matrix(sparse_train_matrixSEXP); + Rcpp::traits::input_parameter< size_t >::type outcome_index(outcome_indexSEXP); + Rcpp::traits::input_parameter< unsigned int >::type mtry(mtrySEXP); + Rcpp::traits::input_parameter< unsigned int >::type num_trees(num_treesSEXP); + Rcpp::traits::input_parameter< unsigned int >::type min_node_size(min_node_sizeSEXP); + Rcpp::traits::input_parameter< double >::type sample_fraction(sample_fractionSEXP); + Rcpp::traits::input_parameter< bool >::type honesty(honestySEXP); + Rcpp::traits::input_parameter< double >::type honesty_fraction(honesty_fractionSEXP); + Rcpp::traits::input_parameter< bool >::type prune_empty_leaves(prune_empty_leavesSEXP); + Rcpp::traits::input_parameter< size_t >::type ci_group_size(ci_group_sizeSEXP); + Rcpp::traits::input_parameter< double >::type alpha(alphaSEXP); + Rcpp::traits::input_parameter< double >::type imbalance_penalty(imbalance_penaltySEXP); + Rcpp::traits::input_parameter< std::vector >::type clusters(clustersSEXP); + Rcpp::traits::input_parameter< unsigned int >::type samples_per_cluster(samples_per_clusterSEXP); + Rcpp::traits::input_parameter< bool >::type compute_oob_predictions(compute_oob_predictionsSEXP); + Rcpp::traits::input_parameter< unsigned int >::type num_threads(num_threadsSEXP); + Rcpp::traits::input_parameter< unsigned int >::type seed(seedSEXP); + rcpp_result_gen = Rcpp::wrap(custom_train(train_matrix, sparse_train_matrix, outcome_index, mtry, num_trees, min_node_size, sample_fraction, honesty, honesty_fraction, prune_empty_leaves, ci_group_size, alpha, imbalance_penalty, clusters, samples_per_cluster, compute_oob_predictions, num_threads, seed)); + return rcpp_result_gen; +END_RCPP +} +// custom_predict +Rcpp::NumericMatrix custom_predict(Rcpp::List forest_object, Rcpp::NumericMatrix train_matrix, Eigen::SparseMatrix sparse_train_matrix, size_t outcome_index, Rcpp::NumericMatrix test_matrix, Eigen::SparseMatrix sparse_test_matrix, unsigned int num_threads); +RcppExport SEXP _grf_custom_predict(SEXP forest_objectSEXP, SEXP train_matrixSEXP, SEXP sparse_train_matrixSEXP, SEXP outcome_indexSEXP, SEXP test_matrixSEXP, SEXP sparse_test_matrixSEXP, SEXP num_threadsSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::List >::type forest_object(forest_objectSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type train_matrix(train_matrixSEXP); + Rcpp::traits::input_parameter< Eigen::SparseMatrix >::type sparse_train_matrix(sparse_train_matrixSEXP); + Rcpp::traits::input_parameter< size_t >::type outcome_index(outcome_indexSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type test_matrix(test_matrixSEXP); + Rcpp::traits::input_parameter< Eigen::SparseMatrix >::type sparse_test_matrix(sparse_test_matrixSEXP); + Rcpp::traits::input_parameter< unsigned int >::type num_threads(num_threadsSEXP); + rcpp_result_gen = Rcpp::wrap(custom_predict(forest_object, train_matrix, sparse_train_matrix, outcome_index, test_matrix, sparse_test_matrix, num_threads)); + return rcpp_result_gen; +END_RCPP +} +// custom_predict_oob +Rcpp::NumericMatrix custom_predict_oob(Rcpp::List forest_object, Rcpp::NumericMatrix train_matrix, Eigen::SparseMatrix sparse_train_matrix, size_t outcome_index, unsigned int num_threads); +RcppExport SEXP _grf_custom_predict_oob(SEXP forest_objectSEXP, SEXP train_matrixSEXP, SEXP sparse_train_matrixSEXP, SEXP outcome_indexSEXP, SEXP num_threadsSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::List >::type forest_object(forest_objectSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type train_matrix(train_matrixSEXP); + Rcpp::traits::input_parameter< Eigen::SparseMatrix >::type sparse_train_matrix(sparse_train_matrixSEXP); + Rcpp::traits::input_parameter< size_t >::type outcome_index(outcome_indexSEXP); + Rcpp::traits::input_parameter< unsigned int >::type num_threads(num_threadsSEXP); + rcpp_result_gen = Rcpp::wrap(custom_predict_oob(forest_object, train_matrix, sparse_train_matrix, outcome_index, num_threads)); + return rcpp_result_gen; +END_RCPP +} +// instrumental_train +Rcpp::List instrumental_train(Rcpp::NumericMatrix train_matrix, Eigen::SparseMatrix sparse_train_matrix, size_t outcome_index, size_t treatment_index, size_t instrument_index, size_t sample_weight_index, bool use_sample_weights, unsigned int mtry, unsigned int num_trees, unsigned int min_node_size, double sample_fraction, bool honesty, double honesty_fraction, bool prune_empty_leaves, size_t ci_group_size, double reduced_form_weight, double alpha, double imbalance_penalty, bool stabilize_splits, std::vector clusters, unsigned int samples_per_cluster, bool compute_oob_predictions, unsigned int num_threads, unsigned int seed); +RcppExport SEXP _grf_instrumental_train(SEXP train_matrixSEXP, SEXP sparse_train_matrixSEXP, SEXP outcome_indexSEXP, SEXP treatment_indexSEXP, SEXP instrument_indexSEXP, SEXP sample_weight_indexSEXP, SEXP use_sample_weightsSEXP, SEXP mtrySEXP, SEXP num_treesSEXP, SEXP min_node_sizeSEXP, SEXP sample_fractionSEXP, SEXP honestySEXP, SEXP honesty_fractionSEXP, SEXP prune_empty_leavesSEXP, SEXP ci_group_sizeSEXP, SEXP reduced_form_weightSEXP, SEXP alphaSEXP, SEXP imbalance_penaltySEXP, SEXP stabilize_splitsSEXP, SEXP clustersSEXP, SEXP samples_per_clusterSEXP, SEXP compute_oob_predictionsSEXP, SEXP num_threadsSEXP, SEXP seedSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type train_matrix(train_matrixSEXP); + Rcpp::traits::input_parameter< Eigen::SparseMatrix >::type sparse_train_matrix(sparse_train_matrixSEXP); + Rcpp::traits::input_parameter< size_t >::type outcome_index(outcome_indexSEXP); + Rcpp::traits::input_parameter< size_t >::type treatment_index(treatment_indexSEXP); + Rcpp::traits::input_parameter< size_t >::type instrument_index(instrument_indexSEXP); + Rcpp::traits::input_parameter< size_t >::type sample_weight_index(sample_weight_indexSEXP); + Rcpp::traits::input_parameter< bool >::type use_sample_weights(use_sample_weightsSEXP); + Rcpp::traits::input_parameter< unsigned int >::type mtry(mtrySEXP); + Rcpp::traits::input_parameter< unsigned int >::type num_trees(num_treesSEXP); + Rcpp::traits::input_parameter< unsigned int >::type min_node_size(min_node_sizeSEXP); + Rcpp::traits::input_parameter< double >::type sample_fraction(sample_fractionSEXP); + Rcpp::traits::input_parameter< bool >::type honesty(honestySEXP); + Rcpp::traits::input_parameter< double >::type honesty_fraction(honesty_fractionSEXP); + Rcpp::traits::input_parameter< bool >::type prune_empty_leaves(prune_empty_leavesSEXP); + Rcpp::traits::input_parameter< size_t >::type ci_group_size(ci_group_sizeSEXP); + Rcpp::traits::input_parameter< double >::type reduced_form_weight(reduced_form_weightSEXP); + Rcpp::traits::input_parameter< double >::type alpha(alphaSEXP); + Rcpp::traits::input_parameter< double >::type imbalance_penalty(imbalance_penaltySEXP); + Rcpp::traits::input_parameter< bool >::type stabilize_splits(stabilize_splitsSEXP); + Rcpp::traits::input_parameter< std::vector >::type clusters(clustersSEXP); + Rcpp::traits::input_parameter< unsigned int >::type samples_per_cluster(samples_per_clusterSEXP); + Rcpp::traits::input_parameter< bool >::type compute_oob_predictions(compute_oob_predictionsSEXP); + Rcpp::traits::input_parameter< unsigned int >::type num_threads(num_threadsSEXP); + Rcpp::traits::input_parameter< unsigned int >::type seed(seedSEXP); + rcpp_result_gen = Rcpp::wrap(instrumental_train(train_matrix, sparse_train_matrix, outcome_index, treatment_index, instrument_index, sample_weight_index, use_sample_weights, mtry, num_trees, min_node_size, sample_fraction, honesty, honesty_fraction, prune_empty_leaves, ci_group_size, reduced_form_weight, alpha, imbalance_penalty, stabilize_splits, clusters, samples_per_cluster, compute_oob_predictions, num_threads, seed)); + return rcpp_result_gen; +END_RCPP +} +// instrumental_predict +Rcpp::List instrumental_predict(Rcpp::List forest_object, Rcpp::NumericMatrix train_matrix, Eigen::SparseMatrix sparse_train_matrix, size_t outcome_index, size_t treatment_index, size_t instrument_index, Rcpp::NumericMatrix test_matrix, Eigen::SparseMatrix sparse_test_matrix, unsigned int num_threads, bool estimate_variance); +RcppExport SEXP _grf_instrumental_predict(SEXP forest_objectSEXP, SEXP train_matrixSEXP, SEXP sparse_train_matrixSEXP, SEXP outcome_indexSEXP, SEXP treatment_indexSEXP, SEXP instrument_indexSEXP, SEXP test_matrixSEXP, SEXP sparse_test_matrixSEXP, SEXP num_threadsSEXP, SEXP estimate_varianceSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::List >::type forest_object(forest_objectSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type train_matrix(train_matrixSEXP); + Rcpp::traits::input_parameter< Eigen::SparseMatrix >::type sparse_train_matrix(sparse_train_matrixSEXP); + Rcpp::traits::input_parameter< size_t >::type outcome_index(outcome_indexSEXP); + Rcpp::traits::input_parameter< size_t >::type treatment_index(treatment_indexSEXP); + Rcpp::traits::input_parameter< size_t >::type instrument_index(instrument_indexSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type test_matrix(test_matrixSEXP); + Rcpp::traits::input_parameter< Eigen::SparseMatrix >::type sparse_test_matrix(sparse_test_matrixSEXP); + Rcpp::traits::input_parameter< unsigned int >::type num_threads(num_threadsSEXP); + Rcpp::traits::input_parameter< bool >::type estimate_variance(estimate_varianceSEXP); + rcpp_result_gen = Rcpp::wrap(instrumental_predict(forest_object, train_matrix, sparse_train_matrix, outcome_index, treatment_index, instrument_index, test_matrix, sparse_test_matrix, num_threads, estimate_variance)); + return rcpp_result_gen; +END_RCPP +} +// instrumental_predict_oob +Rcpp::List instrumental_predict_oob(Rcpp::List forest_object, Rcpp::NumericMatrix train_matrix, Eigen::SparseMatrix sparse_train_matrix, size_t outcome_index, size_t treatment_index, size_t instrument_index, unsigned int num_threads, bool estimate_variance); +RcppExport SEXP _grf_instrumental_predict_oob(SEXP forest_objectSEXP, SEXP train_matrixSEXP, SEXP sparse_train_matrixSEXP, SEXP outcome_indexSEXP, SEXP treatment_indexSEXP, SEXP instrument_indexSEXP, SEXP num_threadsSEXP, SEXP estimate_varianceSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::List >::type forest_object(forest_objectSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type train_matrix(train_matrixSEXP); + Rcpp::traits::input_parameter< Eigen::SparseMatrix >::type sparse_train_matrix(sparse_train_matrixSEXP); + Rcpp::traits::input_parameter< size_t >::type outcome_index(outcome_indexSEXP); + Rcpp::traits::input_parameter< size_t >::type treatment_index(treatment_indexSEXP); + Rcpp::traits::input_parameter< size_t >::type instrument_index(instrument_indexSEXP); + Rcpp::traits::input_parameter< unsigned int >::type num_threads(num_threadsSEXP); + Rcpp::traits::input_parameter< bool >::type estimate_variance(estimate_varianceSEXP); + rcpp_result_gen = Rcpp::wrap(instrumental_predict_oob(forest_object, train_matrix, sparse_train_matrix, outcome_index, treatment_index, instrument_index, num_threads, estimate_variance)); + return rcpp_result_gen; +END_RCPP +} +// quantile_train +Rcpp::List quantile_train(std::vector quantiles, bool regression_splits, Rcpp::NumericMatrix train_matrix, Eigen::SparseMatrix sparse_train_matrix, size_t outcome_index, unsigned int mtry, unsigned int num_trees, int min_node_size, double sample_fraction, bool honesty, double honesty_fraction, bool prune_empty_leaves, size_t ci_group_size, double alpha, double imbalance_penalty, std::vector clusters, unsigned int samples_per_cluster, int num_threads, unsigned int seed); +RcppExport SEXP _grf_quantile_train(SEXP quantilesSEXP, SEXP regression_splitsSEXP, SEXP train_matrixSEXP, SEXP sparse_train_matrixSEXP, SEXP outcome_indexSEXP, SEXP mtrySEXP, SEXP num_treesSEXP, SEXP min_node_sizeSEXP, SEXP sample_fractionSEXP, SEXP honestySEXP, SEXP honesty_fractionSEXP, SEXP prune_empty_leavesSEXP, SEXP ci_group_sizeSEXP, SEXP alphaSEXP, SEXP imbalance_penaltySEXP, SEXP clustersSEXP, SEXP samples_per_clusterSEXP, SEXP num_threadsSEXP, SEXP seedSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< std::vector >::type quantiles(quantilesSEXP); + Rcpp::traits::input_parameter< bool >::type regression_splits(regression_splitsSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type train_matrix(train_matrixSEXP); + Rcpp::traits::input_parameter< Eigen::SparseMatrix >::type sparse_train_matrix(sparse_train_matrixSEXP); + Rcpp::traits::input_parameter< size_t >::type outcome_index(outcome_indexSEXP); + Rcpp::traits::input_parameter< unsigned int >::type mtry(mtrySEXP); + Rcpp::traits::input_parameter< unsigned int >::type num_trees(num_treesSEXP); + Rcpp::traits::input_parameter< int >::type min_node_size(min_node_sizeSEXP); + Rcpp::traits::input_parameter< double >::type sample_fraction(sample_fractionSEXP); + Rcpp::traits::input_parameter< bool >::type honesty(honestySEXP); + Rcpp::traits::input_parameter< double >::type honesty_fraction(honesty_fractionSEXP); + Rcpp::traits::input_parameter< bool >::type prune_empty_leaves(prune_empty_leavesSEXP); + Rcpp::traits::input_parameter< size_t >::type ci_group_size(ci_group_sizeSEXP); + Rcpp::traits::input_parameter< double >::type alpha(alphaSEXP); + Rcpp::traits::input_parameter< double >::type imbalance_penalty(imbalance_penaltySEXP); + Rcpp::traits::input_parameter< std::vector >::type clusters(clustersSEXP); + Rcpp::traits::input_parameter< unsigned int >::type samples_per_cluster(samples_per_clusterSEXP); + Rcpp::traits::input_parameter< int >::type num_threads(num_threadsSEXP); + Rcpp::traits::input_parameter< unsigned int >::type seed(seedSEXP); + rcpp_result_gen = Rcpp::wrap(quantile_train(quantiles, regression_splits, train_matrix, sparse_train_matrix, outcome_index, mtry, num_trees, min_node_size, sample_fraction, honesty, honesty_fraction, prune_empty_leaves, ci_group_size, alpha, imbalance_penalty, clusters, samples_per_cluster, num_threads, seed)); + return rcpp_result_gen; +END_RCPP +} +// quantile_predict +Rcpp::NumericMatrix quantile_predict(Rcpp::List forest_object, std::vector quantiles, Rcpp::NumericMatrix train_matrix, Eigen::SparseMatrix sparse_train_matrix, size_t outcome_index, Rcpp::NumericMatrix test_matrix, Eigen::SparseMatrix sparse_test_matrix, unsigned int num_threads); +RcppExport SEXP _grf_quantile_predict(SEXP forest_objectSEXP, SEXP quantilesSEXP, SEXP train_matrixSEXP, SEXP sparse_train_matrixSEXP, SEXP outcome_indexSEXP, SEXP test_matrixSEXP, SEXP sparse_test_matrixSEXP, SEXP num_threadsSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::List >::type forest_object(forest_objectSEXP); + Rcpp::traits::input_parameter< std::vector >::type quantiles(quantilesSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type train_matrix(train_matrixSEXP); + Rcpp::traits::input_parameter< Eigen::SparseMatrix >::type sparse_train_matrix(sparse_train_matrixSEXP); + Rcpp::traits::input_parameter< size_t >::type outcome_index(outcome_indexSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type test_matrix(test_matrixSEXP); + Rcpp::traits::input_parameter< Eigen::SparseMatrix >::type sparse_test_matrix(sparse_test_matrixSEXP); + Rcpp::traits::input_parameter< unsigned int >::type num_threads(num_threadsSEXP); + rcpp_result_gen = Rcpp::wrap(quantile_predict(forest_object, quantiles, train_matrix, sparse_train_matrix, outcome_index, test_matrix, sparse_test_matrix, num_threads)); + return rcpp_result_gen; +END_RCPP +} +// quantile_predict_oob +Rcpp::NumericMatrix quantile_predict_oob(Rcpp::List forest_object, std::vector quantiles, Rcpp::NumericMatrix train_matrix, Eigen::SparseMatrix sparse_train_matrix, size_t outcome_index, unsigned int num_threads); +RcppExport SEXP _grf_quantile_predict_oob(SEXP forest_objectSEXP, SEXP quantilesSEXP, SEXP train_matrixSEXP, SEXP sparse_train_matrixSEXP, SEXP outcome_indexSEXP, SEXP num_threadsSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::List >::type forest_object(forest_objectSEXP); + Rcpp::traits::input_parameter< std::vector >::type quantiles(quantilesSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type train_matrix(train_matrixSEXP); + Rcpp::traits::input_parameter< Eigen::SparseMatrix >::type sparse_train_matrix(sparse_train_matrixSEXP); + Rcpp::traits::input_parameter< size_t >::type outcome_index(outcome_indexSEXP); + Rcpp::traits::input_parameter< unsigned int >::type num_threads(num_threadsSEXP); + rcpp_result_gen = Rcpp::wrap(quantile_predict_oob(forest_object, quantiles, train_matrix, sparse_train_matrix, outcome_index, num_threads)); + return rcpp_result_gen; +END_RCPP +} +// regression_train +Rcpp::List regression_train(Rcpp::NumericMatrix train_matrix, Eigen::SparseMatrix sparse_train_matrix, size_t outcome_index, size_t sample_weight_index, bool use_sample_weights, unsigned int mtry, unsigned int num_trees, unsigned int min_node_size, double sample_fraction, bool honesty, double honesty_fraction, bool prune_empty_leaves, size_t ci_group_size, double alpha, double imbalance_penalty, std::vector clusters, unsigned int samples_per_cluster, bool compute_oob_predictions, unsigned int num_threads, unsigned int seed); +RcppExport SEXP _grf_regression_train(SEXP train_matrixSEXP, SEXP sparse_train_matrixSEXP, SEXP outcome_indexSEXP, SEXP sample_weight_indexSEXP, SEXP use_sample_weightsSEXP, SEXP mtrySEXP, SEXP num_treesSEXP, SEXP min_node_sizeSEXP, SEXP sample_fractionSEXP, SEXP honestySEXP, SEXP honesty_fractionSEXP, SEXP prune_empty_leavesSEXP, SEXP ci_group_sizeSEXP, SEXP alphaSEXP, SEXP imbalance_penaltySEXP, SEXP clustersSEXP, SEXP samples_per_clusterSEXP, SEXP compute_oob_predictionsSEXP, SEXP num_threadsSEXP, SEXP seedSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type train_matrix(train_matrixSEXP); + Rcpp::traits::input_parameter< Eigen::SparseMatrix >::type sparse_train_matrix(sparse_train_matrixSEXP); + Rcpp::traits::input_parameter< size_t >::type outcome_index(outcome_indexSEXP); + Rcpp::traits::input_parameter< size_t >::type sample_weight_index(sample_weight_indexSEXP); + Rcpp::traits::input_parameter< bool >::type use_sample_weights(use_sample_weightsSEXP); + Rcpp::traits::input_parameter< unsigned int >::type mtry(mtrySEXP); + Rcpp::traits::input_parameter< unsigned int >::type num_trees(num_treesSEXP); + Rcpp::traits::input_parameter< unsigned int >::type min_node_size(min_node_sizeSEXP); + Rcpp::traits::input_parameter< double >::type sample_fraction(sample_fractionSEXP); + Rcpp::traits::input_parameter< bool >::type honesty(honestySEXP); + Rcpp::traits::input_parameter< double >::type honesty_fraction(honesty_fractionSEXP); + Rcpp::traits::input_parameter< bool >::type prune_empty_leaves(prune_empty_leavesSEXP); + Rcpp::traits::input_parameter< size_t >::type ci_group_size(ci_group_sizeSEXP); + Rcpp::traits::input_parameter< double >::type alpha(alphaSEXP); + Rcpp::traits::input_parameter< double >::type imbalance_penalty(imbalance_penaltySEXP); + Rcpp::traits::input_parameter< std::vector >::type clusters(clustersSEXP); + Rcpp::traits::input_parameter< unsigned int >::type samples_per_cluster(samples_per_clusterSEXP); + Rcpp::traits::input_parameter< bool >::type compute_oob_predictions(compute_oob_predictionsSEXP); + Rcpp::traits::input_parameter< unsigned int >::type num_threads(num_threadsSEXP); + Rcpp::traits::input_parameter< unsigned int >::type seed(seedSEXP); + rcpp_result_gen = Rcpp::wrap(regression_train(train_matrix, sparse_train_matrix, outcome_index, sample_weight_index, use_sample_weights, mtry, num_trees, min_node_size, sample_fraction, honesty, honesty_fraction, prune_empty_leaves, ci_group_size, alpha, imbalance_penalty, clusters, samples_per_cluster, compute_oob_predictions, num_threads, seed)); + return rcpp_result_gen; +END_RCPP +} +// regression_predict +Rcpp::List regression_predict(Rcpp::List forest_object, Rcpp::NumericMatrix train_matrix, Eigen::SparseMatrix sparse_train_matrix, size_t outcome_index, Rcpp::NumericMatrix test_matrix, Eigen::SparseMatrix sparse_test_matrix, unsigned int num_threads, unsigned int estimate_variance); +RcppExport SEXP _grf_regression_predict(SEXP forest_objectSEXP, SEXP train_matrixSEXP, SEXP sparse_train_matrixSEXP, SEXP outcome_indexSEXP, SEXP test_matrixSEXP, SEXP sparse_test_matrixSEXP, SEXP num_threadsSEXP, SEXP estimate_varianceSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::List >::type forest_object(forest_objectSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type train_matrix(train_matrixSEXP); + Rcpp::traits::input_parameter< Eigen::SparseMatrix >::type sparse_train_matrix(sparse_train_matrixSEXP); + Rcpp::traits::input_parameter< size_t >::type outcome_index(outcome_indexSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type test_matrix(test_matrixSEXP); + Rcpp::traits::input_parameter< Eigen::SparseMatrix >::type sparse_test_matrix(sparse_test_matrixSEXP); + Rcpp::traits::input_parameter< unsigned int >::type num_threads(num_threadsSEXP); + Rcpp::traits::input_parameter< unsigned int >::type estimate_variance(estimate_varianceSEXP); + rcpp_result_gen = Rcpp::wrap(regression_predict(forest_object, train_matrix, sparse_train_matrix, outcome_index, test_matrix, sparse_test_matrix, num_threads, estimate_variance)); + return rcpp_result_gen; +END_RCPP +} +// regression_predict_oob +Rcpp::List regression_predict_oob(Rcpp::List forest_object, Rcpp::NumericMatrix train_matrix, Eigen::SparseMatrix sparse_train_matrix, size_t outcome_index, unsigned int num_threads, bool estimate_variance); +RcppExport SEXP _grf_regression_predict_oob(SEXP forest_objectSEXP, SEXP train_matrixSEXP, SEXP sparse_train_matrixSEXP, SEXP outcome_indexSEXP, SEXP num_threadsSEXP, SEXP estimate_varianceSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::List >::type forest_object(forest_objectSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type train_matrix(train_matrixSEXP); + Rcpp::traits::input_parameter< Eigen::SparseMatrix >::type sparse_train_matrix(sparse_train_matrixSEXP); + Rcpp::traits::input_parameter< size_t >::type outcome_index(outcome_indexSEXP); + Rcpp::traits::input_parameter< unsigned int >::type num_threads(num_threadsSEXP); + Rcpp::traits::input_parameter< bool >::type estimate_variance(estimate_varianceSEXP); + rcpp_result_gen = Rcpp::wrap(regression_predict_oob(forest_object, train_matrix, sparse_train_matrix, outcome_index, num_threads, estimate_variance)); + return rcpp_result_gen; +END_RCPP +} +// ll_regression_predict +Rcpp::List ll_regression_predict(Rcpp::List forest, Rcpp::NumericMatrix train_matrix, Eigen::SparseMatrix sparse_train_matrix, size_t outcome_index, Rcpp::NumericMatrix test_matrix, Eigen::SparseMatrix sparse_test_matrix, std::vector lambdas, bool weight_penalty, std::vector linear_correction_variables, unsigned int num_threads, bool estimate_variance); +RcppExport SEXP _grf_ll_regression_predict(SEXP forestSEXP, SEXP train_matrixSEXP, SEXP sparse_train_matrixSEXP, SEXP outcome_indexSEXP, SEXP test_matrixSEXP, SEXP sparse_test_matrixSEXP, SEXP lambdasSEXP, SEXP weight_penaltySEXP, SEXP linear_correction_variablesSEXP, SEXP num_threadsSEXP, SEXP estimate_varianceSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::List >::type forest(forestSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type train_matrix(train_matrixSEXP); + Rcpp::traits::input_parameter< Eigen::SparseMatrix >::type sparse_train_matrix(sparse_train_matrixSEXP); + Rcpp::traits::input_parameter< size_t >::type outcome_index(outcome_indexSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type test_matrix(test_matrixSEXP); + Rcpp::traits::input_parameter< Eigen::SparseMatrix >::type sparse_test_matrix(sparse_test_matrixSEXP); + Rcpp::traits::input_parameter< std::vector >::type lambdas(lambdasSEXP); + Rcpp::traits::input_parameter< bool >::type weight_penalty(weight_penaltySEXP); + Rcpp::traits::input_parameter< std::vector >::type linear_correction_variables(linear_correction_variablesSEXP); + Rcpp::traits::input_parameter< unsigned int >::type num_threads(num_threadsSEXP); + Rcpp::traits::input_parameter< bool >::type estimate_variance(estimate_varianceSEXP); + rcpp_result_gen = Rcpp::wrap(ll_regression_predict(forest, train_matrix, sparse_train_matrix, outcome_index, test_matrix, sparse_test_matrix, lambdas, weight_penalty, linear_correction_variables, num_threads, estimate_variance)); + return rcpp_result_gen; +END_RCPP +} +// ll_regression_predict_oob +Rcpp::List ll_regression_predict_oob(Rcpp::List forest, Rcpp::NumericMatrix train_matrix, Eigen::SparseMatrix sparse_train_matrix, size_t outcome_index, std::vector lambdas, bool weight_penalty, std::vector linear_correction_variables, unsigned int num_threads, bool estimate_variance); +RcppExport SEXP _grf_ll_regression_predict_oob(SEXP forestSEXP, SEXP train_matrixSEXP, SEXP sparse_train_matrixSEXP, SEXP outcome_indexSEXP, SEXP lambdasSEXP, SEXP weight_penaltySEXP, SEXP linear_correction_variablesSEXP, SEXP num_threadsSEXP, SEXP estimate_varianceSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::List >::type forest(forestSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type train_matrix(train_matrixSEXP); + Rcpp::traits::input_parameter< Eigen::SparseMatrix >::type sparse_train_matrix(sparse_train_matrixSEXP); + Rcpp::traits::input_parameter< size_t >::type outcome_index(outcome_indexSEXP); + Rcpp::traits::input_parameter< std::vector >::type lambdas(lambdasSEXP); + Rcpp::traits::input_parameter< bool >::type weight_penalty(weight_penaltySEXP); + Rcpp::traits::input_parameter< std::vector >::type linear_correction_variables(linear_correction_variablesSEXP); + Rcpp::traits::input_parameter< unsigned int >::type num_threads(num_threadsSEXP); + Rcpp::traits::input_parameter< bool >::type estimate_variance(estimate_varianceSEXP); + rcpp_result_gen = Rcpp::wrap(ll_regression_predict_oob(forest, train_matrix, sparse_train_matrix, outcome_index, lambdas, weight_penalty, linear_correction_variables, num_threads, estimate_variance)); + return rcpp_result_gen; +END_RCPP +} + +static const R_CallMethodDef CallEntries[] = { + {"_grf_compute_split_frequencies", (DL_FUNC) &_grf_compute_split_frequencies, 2}, + {"_grf_compute_weights", (DL_FUNC) &_grf_compute_weights, 6}, + {"_grf_compute_weights_oob", (DL_FUNC) &_grf_compute_weights_oob, 4}, + {"_grf_deserialize_tree", (DL_FUNC) &_grf_deserialize_tree, 2}, + {"_grf_merge", (DL_FUNC) &_grf_merge, 1}, + {"_grf_causal_train", (DL_FUNC) &_grf_causal_train, 23}, + {"_grf_causal_predict", (DL_FUNC) &_grf_causal_predict, 9}, + {"_grf_causal_predict_oob", (DL_FUNC) &_grf_causal_predict_oob, 7}, + {"_grf_ll_causal_predict", (DL_FUNC) &_grf_ll_causal_predict, 12}, + {"_grf_ll_causal_predict_oob", (DL_FUNC) &_grf_ll_causal_predict_oob, 10}, + {"_grf_custom_train", (DL_FUNC) &_grf_custom_train, 18}, + {"_grf_custom_predict", (DL_FUNC) &_grf_custom_predict, 7}, + {"_grf_custom_predict_oob", (DL_FUNC) &_grf_custom_predict_oob, 5}, + {"_grf_instrumental_train", (DL_FUNC) &_grf_instrumental_train, 24}, + {"_grf_instrumental_predict", (DL_FUNC) &_grf_instrumental_predict, 10}, + {"_grf_instrumental_predict_oob", (DL_FUNC) &_grf_instrumental_predict_oob, 8}, + {"_grf_quantile_train", (DL_FUNC) &_grf_quantile_train, 19}, + {"_grf_quantile_predict", (DL_FUNC) &_grf_quantile_predict, 8}, + {"_grf_quantile_predict_oob", (DL_FUNC) &_grf_quantile_predict_oob, 6}, + {"_grf_regression_train", (DL_FUNC) &_grf_regression_train, 20}, + {"_grf_regression_predict", (DL_FUNC) &_grf_regression_predict, 8}, + {"_grf_regression_predict_oob", (DL_FUNC) &_grf_regression_predict_oob, 6}, + {"_grf_ll_regression_predict", (DL_FUNC) &_grf_ll_regression_predict, 11}, + {"_grf_ll_regression_predict_oob", (DL_FUNC) &_grf_ll_regression_predict_oob, 9}, + {NULL, NULL, 0} +}; + +RcppExport void R_init_grf(DllInfo *dll) { + R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); + R_useDynamicSymbols(dll, FALSE); +} From 3f75a679d458e50a39807fc60744a248814ce3d9 Mon Sep 17 00:00:00 2001 From: Julie Tibshirani Date: Tue, 20 Aug 2019 23:47:14 -0700 Subject: [PATCH 2/5] Ensure quantile forests give consistent estimates across platforms. (#492) We now include a tie-breaker while sorting the list of sample IDs and outcomes. Without the tie-breaker, the sort could produce a different element ordering across different runs of the algorithm. --- .../prediction/QuantilePredictionStrategy.cpp | 6 +++++- .../resources/quantile_oob_predictions.csv | 18 +++++++++--------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/core/src/prediction/QuantilePredictionStrategy.cpp b/core/src/prediction/QuantilePredictionStrategy.cpp index 511126495..d9556bb90 100644 --- a/core/src/prediction/QuantilePredictionStrategy.cpp +++ b/core/src/prediction/QuantilePredictionStrategy.cpp @@ -52,7 +52,11 @@ std::vector QuantilePredictionStrategy::compute_quantile_cutoffs( std::sort(samples_and_values.begin(), samples_and_values.end(), [](std::pair first_pair, std::pair second_pair) { - return first_pair.second < second_pair.second; + // Note: we add a tie-breaker here to ensure that this sort consistently produces the + // same element ordering. Otherwise, different runs of the algorithm could result in + // different quantile predictions on the same data. + return first_pair.second < second_pair.second + || (first_pair.second == second_pair.second && first_pair.first < second_pair.first); }); std::vector quantile_cutoffs; diff --git a/core/test/forest/resources/quantile_oob_predictions.csv b/core/test/forest/resources/quantile_oob_predictions.csv index da10cfe6b..534c7d97f 100644 --- a/core/test/forest/resources/quantile_oob_predictions.csv +++ b/core/test/forest/resources/quantile_oob_predictions.csv @@ -121,7 +121,7 @@ -10, -10, 10 -10, -10, 10 -1, -1, 1 --1, -1, 1 +-1, 1, 1 -1, -1, 1 10, 10, 10 -10, -10, -10 @@ -410,7 +410,7 @@ -1, -1, 1 -1, -1, 1 -1, -1, 1 --10, -10, 10 +-10, 10, 10 -1, -1, 1 10, 10, 10 -1, -1, 1 @@ -421,7 +421,7 @@ -10, 10, 10 -1, -1, 1 -1, -1, 1 --1, -1, 1 +-1, 1, 1 -1, -1, 1 -1, 1, 1 -10, 10, 10 @@ -459,7 +459,7 @@ -1, -1, 1 -10, 10, 10 -10, -10, 10 --10, 10, 10 +-10, -10, 10 -1, -1, 1 -1, -1, 1 -10, -1, 10 @@ -537,7 +537,7 @@ -10, 10, 10 -1, 1, 1 -1, -1, 1 --10, 10, 10 +-10, -10, 10 -10, -10, 10 -1, 1, 1 -1, 1, 1 @@ -589,7 +589,7 @@ -1, 1, 1 -1, -1, 1 -10, 10, 10 --1, -1, 1 +-1, 1, 1 -1, 1, 1 -1, -1, 1 -10, -10, 10 @@ -832,7 +832,7 @@ -10, -10, 10 -1, 1, 1 -10, -10, -10 --1, 1, 1 +-1, -1, 1 -1, 1, 1 -1, -1, 1 -10, -10, 10 @@ -847,7 +847,7 @@ -1, -1, 1 -1, -1, 1 -1, -1, 1 --10, -10, 10 +-10, 10, 10 -10, -10, 10 -10, 10, 10 -1, -1, 1 @@ -857,7 +857,7 @@ 10, 10, 10 -10, -10, 1 -10, 10, 10 --10, -1, 10 +-10, -10, 10 -1, -1, -1 -1, 1, 1 -1, 1, 1 From 9b7cff99d69fad8f47a61692193c3ea2397da4b6 Mon Sep 17 00:00:00 2001 From: Stefan Wager Date: Tue, 20 Aug 2019 23:48:13 -0700 Subject: [PATCH 3/5] fix typo (#491) --- r-package/grf/R/average_treatment_effect.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r-package/grf/R/average_treatment_effect.R b/r-package/grf/R/average_treatment_effect.R index 236b7e057..79db57120 100644 --- a/r-package/grf/R/average_treatment_effect.R +++ b/r-package/grf/R/average_treatment_effect.R @@ -95,7 +95,7 @@ average_treatment_effect <- function(forest, clusters <- if (cluster.se) { forest$clusters } else { - 1:length(forest$Y) + 1:length(forest$Y.orig) } observation.weight <- observation_weights(forest) From 5d79dd04364371d040f2d914fdcfd2c35b96d8c4 Mon Sep 17 00:00:00 2001 From: Julie Tibshirani Date: Wed, 21 Aug 2019 00:20:16 -0700 Subject: [PATCH 4/5] Run the characterization tests in CI. (#485) Previously, the C++ characterization tests only passed when using clang because of differences in the way random numbers are generated across platforms. Because we build on a couple different platforms, we had to disable these tests in CI. Now that we've added platform-independent random number generation in #469 and #492, we can enable the characterization tests. --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index fb5d43fec..e13bd54bb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -33,7 +33,7 @@ matrix: - mkdir build && cd build - cmake -DCMAKE_CXX_COMPILER=$COMPILER .. && make - cd .. - - valgrind --leak-check=full --error-exitcode=1 ./build/grf exclude:[characterization] + - valgrind --leak-check=full --error-exitcode=1 ./build/grf - name: "core grf C++: clang" compiler: clang @@ -59,7 +59,7 @@ matrix: - mkdir build && cd build - cmake -DCMAKE_CXX_COMPILER=$COMPILER .. && make - cd .. - - valgrind --leak-check=full --error-exitcode=1 ./build/grf exclude:[characterization] + - valgrind --leak-check=full --error-exitcode=1 ./build/grf - name: "grf R package" before_install: From 8e5a2a46ec246c8d3e3c76f5d89bfb6ca9093283 Mon Sep 17 00:00:00 2001 From: Rina Friedberg Date: Wed, 21 Aug 2019 15:23:51 -0700 Subject: [PATCH 5/5] Update boosting and bart implementations in simulations (#487) * updated boosting and bart implementations in simulations * added wages simulation code * edited wages code for clarity * added wages call in main file * udpated wages code to reflect new train test split * added explicit rlearner call for readability --- .../local_linear_examples/bias_image.R | 4 +- .../boundary_bias_table.R | 38 +----- .../local_linear_examples/friedman_table.R | 38 +----- experiments/local_linear_examples/main.R | 1 + experiments/local_linear_examples/wages.R | 116 ++++++++++++++++++ 5 files changed, 129 insertions(+), 68 deletions(-) create mode 100644 experiments/local_linear_examples/wages.R diff --git a/experiments/local_linear_examples/bias_image.R b/experiments/local_linear_examples/bias_image.R index 6dc6cc191..db61ac99e 100644 --- a/experiments/local_linear_examples/bias_image.R +++ b/experiments/local_linear_examples/bias_image.R @@ -17,9 +17,7 @@ ll.forest = local_linear_forest(X, Y) # lasso to select local linear correction variables lasso.mod = cv.glmnet(X, Y, alpha=1) -selected = which(coef(lasso.mod) != 0) -# remove intercept and adjust indexes correspondingly -selected = selected[2:length(selected)] - 1 +selected = as.numeric(predict(lasso.mod, type = "nonzero")) preds.llf = predict(ll.forest, linear.correction.variables = selected, tune.lambda = TRUE)$predictions ticks = seq(-1, 1, length = 2000) diff --git a/experiments/local_linear_examples/boundary_bias_table.R b/experiments/local_linear_examples/boundary_bias_table.R index 9b681fa34..db3e940d7 100644 --- a/experiments/local_linear_examples/boundary_bias_table.R +++ b/experiments/local_linear_examples/boundary_bias_table.R @@ -2,6 +2,7 @@ library(grf) library(glmnet) library(BART) library(xgboost) +library(rlearner) set.seed(1234) @@ -9,27 +10,6 @@ mu = function(x){ log(1+exp(6*x[1])) } -boosting.cv = function(dtrain, Y){ - - # cross-validate on a reasonable grid - etas = seq(0, 0.3, by = 0.1) - nrounds = c(50, 100, 500, 1000) - max_depth = seq(2, 8, by = 2) - - args.cv = expand.grid(e = etas, n = nrounds, d = max_depth) - results = t(apply(args.cv, MARGIN = 1, FUN = function(arguments){ - model.xgb = xgboost(dtrain, nrounds = arguments[2], - params = list(objective = "reg:linear"), - eval_metric = "rmse", - eta = arguments[1], - max_depth = arguments[3]) - xgb.preds = predict(model.xgb, newdata = dtrain) - mean((xgb.preds - Y)**2) - })) - best.index = which.min(results) - return(args.cv[best.index,]) -} - simulation.run = function(n, p, sigma, num.reps = 100, ntest = 2000){ errors = replicate(num.reps, { X = matrix(runif(n*p, -1, 1), nrow = n) @@ -57,7 +37,7 @@ simulation.run = function(n, p, sigma, num.reps = 100, ntest = 2000){ # use lasso to select linear correction variables lasso.mod = cv.glmnet(X, Y, alpha = 1) - lasso.coef = predict(lasso.mod, type = "nonzero") + lasso.coef = as.numeric(predict(lasso.mod, type = "nonzero")) if(!is.null(dim(lasso.coef))){ selected = lasso.coef[,1] } else { @@ -86,20 +66,12 @@ simulation.run = function(n, p, sigma, num.reps = 100, ntest = 2000){ lasso.rf.preds = predict(rf, newdata = X.test)$predictions + predict(lasso.mod, newx = X.test, s = "lambda.min") lasso.rf.mse = mean((lasso.rf.preds-truth)**2) - bart.mod = wbart(X, Y, X.test, nskip = 5, ndpost = 5) + bart.mod = wbart(X, Y, X.test) bart.preds = bart.mod$yhat.test.mean bart.mse = mean((bart.preds-truth)**2) - dtrain = xgb.DMatrix(X, label = Y) - cv.variables = boosting.cv(dtrain, Y) - cv.variables = as.numeric(cv.variables) - eta = cv.variables[1] - nrounds = cv.variables[2] - max_depth = cv.variables[3] - - model.xgb = xgboost(dtrain, nrounds = nrounds, params = list(objective = "reg:linear"), - eval_metric = "rmse", eta = eta, max_depth = max_depth) - xgb.preds = predict(model.xgb, newdata = X.test) + boost.cv.fit = rlearner::cvboost(as.matrix(X), Y) + xgb.preds = predict(boost.cv.fit, as.matrix(X.test)) xg.mse = mean((xgb.preds - truth)**2) c(llf.stepwise.mse, llf.oracle, rf.mse, rf.adapt.mse, lasso.rf.mse, bart.mse, xg.mse, llf.lasso.mse) diff --git a/experiments/local_linear_examples/friedman_table.R b/experiments/local_linear_examples/friedman_table.R index 5ba3f9913..5d62c69cd 100644 --- a/experiments/local_linear_examples/friedman_table.R +++ b/experiments/local_linear_examples/friedman_table.R @@ -2,6 +2,7 @@ library(grf) library(glmnet) library(BART) library(xgboost) +library(rlearner) set.seed(1234) @@ -9,27 +10,6 @@ ff = function(x){ return(10*sin(pi*x[1]*x[2]) + 20*((x[3] - 0.5)**2) + 10*x[4] + 5*x[5]) } -boosting.cv = function(dtrain, Y){ - - # cross-validate on a reasonable grid - etas = seq(0, 0.3, by = 0.1) - nrounds = c(50, 100, 500, 1000) - max_depth = seq(2, 8, by = 2) - - args.cv = expand.grid(e = etas, n = nrounds, d = max_depth) - results = t(apply(args.cv, MARGIN = 1, FUN = function(arguments){ - model.xgb = xgboost(dtrain, nrounds = arguments[2], - params = list(objective = "reg:linear"), - eval_metric = "rmse", - eta = arguments[1], - max_depth = arguments[3]) - xgb.preds = predict(model.xgb, newdata = dtrain) - mean((xgb.preds - Y)**2) - })) - best.index = which.min(results) - return(args.cv[best.index,]) -} - simulation.run = function(n, p, sigma, num.reps = 100, ntest = 2000, num.trees = 2000){ errors = replicate(num.reps, { X = matrix(runif(n*p,0,1), nrow = n) @@ -58,7 +38,8 @@ simulation.run = function(n, p, sigma, num.reps = 100, ntest = 2000, num.trees = # use lasso to select linear correction variables lasso.mod = cv.glmnet(X, Y, alpha = 1) - lasso.coef = predict(lasso.mod, type = "nonzero") + lasso.coef = as.numeric(predict(lasso.mod, type = "nonzero")) + # if lasso chose no variables, use all for LL correction if(!is.null(dim(lasso.coef))){ selected = lasso.coef[,1] } else { @@ -87,19 +68,12 @@ simulation.run = function(n, p, sigma, num.reps = 100, ntest = 2000, num.trees = lasso.rf.preds = predict(rf, newdata = X.test)$predictions + predict(lasso.mod, newx = X.test, s = "lambda.min") lasso.rf.mse = mean((lasso.rf.preds-truth)**2) - bart.mod = wbart(X, Y, X.test, nskip = 5, ndpost = 5) + bart.mod = wbart(X, Y, X.test) bart.preds = bart.mod$yhat.test.mean bart.mse = mean((bart.preds-truth)**2) - dtrain = xgb.DMatrix(X, label = Y) - cv.variables = as.numeric(boosting.cv(dtrain, Y)) - eta = cv.variables[1] - nrounds = cv.variables[2] - max_depth = cv.variables[3] - - model.xgb = xgboost(dtrain, nrounds = nrounds, params = list(objective = "reg:linear"), - eval_metric = "rmse", eta = eta, max_depth = max_depth) - xgb.preds = predict(model.xgb, newdata = X.test) + boost.cv.fit = rlearner::cvboost(as.matrix(X), Y) + xgb.preds = predict(boost.cv.fit, as.matrix(X.test)) xg.mse = mean((xgb.preds - truth)**2) c(llf.stepwise.mse, llf.oracle, rf.mse, rf.adapt.mse, lasso.rf.mse, bart.mse, xg.mse, llf.lasso.mse) diff --git a/experiments/local_linear_examples/main.R b/experiments/local_linear_examples/main.R index 5ec784c0f..63354f524 100644 --- a/experiments/local_linear_examples/main.R +++ b/experiments/local_linear_examples/main.R @@ -10,3 +10,4 @@ source("friedman_table.R") source("confidence.R") source("causal_table.R") source("boundary_bias_table.R") +source("wages.R") \ No newline at end of file diff --git a/experiments/local_linear_examples/wages.R b/experiments/local_linear_examples/wages.R new file mode 100644 index 000000000..a15e4ed3f --- /dev/null +++ b/experiments/local_linear_examples/wages.R @@ -0,0 +1,116 @@ +rm(list = ls()) +set.seed(123) + +library(grf) +library(glmnet) +library(BART) +library(xgboost) +library(ggplot2) +library(dplyr) +library(splines) +library(rlearner) + +# load the data +load("cps1976_2018.RData") +data = data.frame(cps1976_2018) + +# clean data: remove NA and missingg outcomes +data = data[!is.na(data$incwage),] +data = data[data$incwage != 9999999,] # remove missing +data = data[data$incwage != 9999998,] # remove missing + +# extract 2018 data +data = data[data$year == 2018,] + +# add age^2, educ^2 covariates +data$agesq = data$age**2 +data$educsq = data$educ**2 + +covariates = c("age", "agesq", "educ", "educsq", "occ2010", "occ10ly", "sex", "race", + "marst", "labforce", "ind1950", "classwkr", "wkstat", "uhrswork1", "metro", "famsize") + +continuous.covariates = which(covariates %in% c("age", "educ", "agesq", "educsq", "uhrswork1", "famsize")) +outcome = "incwage" + +data = data[,c(covariates, outcome)] +data = data[complete.cases(data),] + +# transform outcome (standard for wage regressions) +data$incwage = log(data$incwage + 1) + +###################### +## ERROR EVALUATION ## +###################### + +num.reps = 100 +size.test = 10000 +sample.sizes = c(2000, 5000, 10000, 50000) + +mse.sample.sizes = data.frame(t(sapply(sample.sizes, function(size){ + index.train = sample(1:nrow(data), size = size, replace = FALSE) + + X = data[index.train, covariates] + Y = data$incwage[index.train] + + X = X[complete.cases(X),] + Y = Y[complete.cases(X)] + + results = data.frame(t(sapply(1:num.reps, function(i){ + print(i) + + index.test = sample((1:nrow(data))[-index.train], size = size.test, replace = FALSE) + + X.test = data[index.test, covariates] + truth = data$incwage[index.test] + + X.test = X.test[complete.cases(X.test),] + truth = truth[complete.cases(X.test)] + + forest = regression_forest(as.matrix(X), Y, honesty = TRUE, tune.parameters = TRUE) + + ll.lambda = tune_ll_regression_forest(forest, linear.correction.variables = continuous.covariates, + ll.weight.penalty = T)$lambda.min + llf.preds = predict(forest, as.matrix(X.test), + linear.correction.variables = continuous.covariates, + ll.lambda = ll.lambda, + ll.weight.penalty = T)$predictions + llf.mse = mean((llf.preds - truth)**2) + + rf.preds = predict(forest, as.matrix(X.test))$predictions + rf.mse = mean((rf.preds - truth)**2) + + ols.form = as.formula(paste("Y", paste(covariates, collapse = "+"), sep = "~")) + dd.ols = cbind(Y, X) + ols.fit = lm(ols.form, dd.ols) + ols.preds = predict(ols.fit, X.test) + ols.mse = mean((ols.preds - truth)**2) + + mm = model.matrix( ~.^2, data = X) + lasso.mod = cv.glmnet(mm, Y, alpha = 1) + mmtest = model.matrix( ~.^2, data = X.test) + lasso.preds = predict(lasso.mod, newx = mmtest, lambda= lasso.mod$lambda.min) + lasso.mse = mean((lasso.preds - truth)**2) + + bart.mod = wbart(X, Y, X.test) + bart.preds = bart.mod$yhat.test.mean + bart.mse = mean((bart.preds-truth)**2) + + boost.cv.fit = rlearner::cvboost(as.matrix(X), Y) + xgb.preds = predict(boost.cv.fit, as.matrix(X.test)) + xg.mse = mean((xgb.preds - truth)**2) + + return(c(ols.mse, llf.mse, rf.mse, lasso.mse, xg.mse, bart.mse)) + }))) + + mses = colMeans(results) + sds = apply(results, MARGIN = 2, FUN = sd) + + as.numeric(c(mses, sds)) +}))) + +colnames(mse.sample.sizes) = c("OLS", "LLF", "RF", "Lasso", "XG", "BART", + "OLS.sd", "LLF.sd", "RF.sd", "Lasso.sd", "XG.sd", "BART.sd") + +mse.sample.sizes$size = sample.sizes + +write.csv(mse.sample.sizes,"wages_sample_sizes.csv", row.names = FALSE) \ No newline at end of file