From d0b4024b26cc777f455f30cd3b5affab5b536ebc Mon Sep 17 00:00:00 2001 From: Patrick Breheny Date: Thu, 18 May 2017 09:10:47 -0500 Subject: [PATCH] Version 3.1-0 --- DESCRIPTION | 2 +- NEWS | 7 ++-- inst/tests/agreement.R | 3 -- inst/tests/basic-functionality.R | 2 - inst/tests/coerce.R | 1 - inst/tests/extra-features.R | 2 - inst/tests/grpsurv.R | 2 - inst/tests/multitask.R | 1 - .../tests/standardization-orthogonalization.R | 1 - man/Lung.Rd | 2 +- man/grpreg.Rd | 37 ++++++++++--------- man/grpsurv.Rd | 4 +- src/gdfit_gaussian.c | 4 -- 13 files changed, 27 insertions(+), 41 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 459d9b8..d12a312 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: grpreg Title: Regularization Paths for Regression Models with Grouped Covariates Version: 3.1-0 -Date: 2017-01-15 +Date: 2017-05-18 Author: Patrick Breheny [aut, cre], Yaohui Zeng [ctb] Maintainer: Patrick Breheny Depends: R (>= 3.1.0), Matrix diff --git a/NEWS b/NEWS index 5a2fa3b..f7b7fa2 100644 --- a/NEWS +++ b/NEWS @@ -1,16 +1,17 @@ -3.1-0 () +3.1-0 (2017-05-18) * New: Additional tests and support for coersion of various types with respect to both X and y - * Internal: new SSR-BEDPP feature screening rule for group lasso * Change: Convergence criterion now based on RMSD of linear predictors * Change: 'Lung' and 'Birthwt' data sets now use factor representation of group, as character vectors are inherently ambiguous with respect to order * Change: max.iter now based on total number of iterations for entire path * Internal: 'X', 'group', and 'group.multiplier' now bundled together in an object called 'XG' to enforce agreement at all times - * Fixed: + * Internal: new SSR-BEDPP feature screening rule for group lasso * Internal: Registration of native routines * Internal: Changing PROTECT/UNPROTECT to conform to new coding standards + * Fixed: The binding of X and G fixes several potential bugs, including + Issue #12 (GitHub) 3.0-2 * Fixed bug involving mismatch between group.multiplier and group if diff --git a/inst/tests/agreement.R b/inst/tests/agreement.R index 2bf2ff9..ff28b72 100644 --- a/inst/tests/agreement.R +++ b/inst/tests/agreement.R @@ -1,6 +1,3 @@ -##source("~/dev/.grpreg.setup.R") -#require(grpreg) - ################################# .test = "gel reproduces lasso" ## ################################# diff --git a/inst/tests/basic-functionality.R b/inst/tests/basic-functionality.R index 59367c5..2f48f88 100644 --- a/inst/tests/basic-functionality.R +++ b/inst/tests/basic-functionality.R @@ -1,5 +1,3 @@ -#source("~/dev/.grpreg.setup.R") - .test = "grpreg() reproduces simple linear regression" n <- 5 p <- 1 diff --git a/inst/tests/coerce.R b/inst/tests/coerce.R index 457900a..5fe0609 100644 --- a/inst/tests/coerce.R +++ b/inst/tests/coerce.R @@ -29,4 +29,3 @@ fit3 <- grpreg(X, y, group=g3, family="binomial") check(coef(fit1, which=50), coef(fit2, which=50), tol=0.001) check(coef(fit2, which=50), coef(fit3, which=50), tol=0.001) check(coef(fit1, which=50), coef(fit3, which=50), tol=0.001) -# check(names(coef(fit1))=="TEST ME") # Do I have good tests for constant columns? diff --git a/inst/tests/extra-features.R b/inst/tests/extra-features.R index 03e4669..b81fe1f 100755 --- a/inst/tests/extra-features.R +++ b/inst/tests/extra-features.R @@ -1,5 +1,3 @@ -set.seed(1) - .test = "logLik is correct" n <- 50 group <- rep(0:4,5:1) diff --git a/inst/tests/grpsurv.R b/inst/tests/grpsurv.R index 0f8a670..ef7c34a 100644 --- a/inst/tests/grpsurv.R +++ b/inst/tests/grpsurv.R @@ -1,5 +1,3 @@ -set.seed(1) - .test = "grpsurv works" y <- survival::Surv(rexp(50), sample(rep(0:1, c(10,40)))) X <- matrix(rnorm(50*6), 50, 6) diff --git a/inst/tests/multitask.R b/inst/tests/multitask.R index d8143aa..0672137 100644 --- a/inst/tests/multitask.R +++ b/inst/tests/multitask.R @@ -1,4 +1,3 @@ -set.seed(1) n <- 100 p <- 5 diff --git a/inst/tests/standardization-orthogonalization.R b/inst/tests/standardization-orthogonalization.R index d4ddef2..8ea7cc1 100644 --- a/inst/tests/standardization-orthogonalization.R +++ b/inst/tests/standardization-orthogonalization.R @@ -1,4 +1,3 @@ -set.seed(1) n <- 20 p <- 5 l <- 5 diff --git a/man/Lung.Rd b/man/Lung.Rd index 6330298..2a79a75 100644 --- a/man/Lung.Rd +++ b/man/Lung.Rd @@ -38,7 +38,7 @@ data(Lung) 0.} } } -\source{\url{http://CRAN.R-project.org/package=survival}} +\source{\url{https://cran.r-project.org/package=survival}} \references{ \itemize{ \item Kalbfleisch D and Prentice RL (1980), \emph{The Statistical diff --git a/man/grpreg.Rd b/man/grpreg.Rd index a7b3a8c..7e07c24 100644 --- a/man/grpreg.Rd +++ b/man/grpreg.Rd @@ -36,10 +36,11 @@ tau = 1/3, group.multiplier, warn=TRUE, returnX = FALSE, ...) computes a grid of lambda values that ranges uniformly on the log scale over the relevant range of lambda values.} \item{lambda.min}{The smallest value for \code{lambda}, as a fraction - of \code{lambda.max}. Default is .0001 if the number of observations - is larger than the number of covariates and .05 otherwise.} - \item{log.lambda}{Whether compute the grid values of lambda on log scale (default) - or linear scale.} + of \code{lambda.max}. Default is .0001 if the number of + observations is larger than the number of covariates and .05 + otherwise.} + \item{log.lambda}{Whether compute the grid values of lambda on log + scale (default) or linear scale.} \item{alpha}{\code{grpreg} allows for both a group penalty and an L2 (ridge) penalty; \code{alpha} controls the proportional weight of the regularization parameters of these two penalties. The group @@ -47,8 +48,8 @@ tau = 1/3, group.multiplier, warn=TRUE, returnX = FALSE, ...) the regularization parameter of the ridge penalty is \code{lambda*(1-alpha)}. Default is 1: no ridge penalty.} \item{eps}{Convergence threshhold. The algorithm iterates until the - change (on the standardized scale) in any coefficient is less than - \code{eps}. Default is \code{.001}. See details.} + RMSD for the change in linear predictors for each coefficient is + less than \code{eps}. Default is \code{1e-4}. See details.} \item{max.iter}{Maximum number of iterations (total across entire path). Default is 10000. See details.} \item{dfmax}{Limit on the number of parameters allowed to be nonzero. @@ -76,16 +77,16 @@ tau = 1/3, group.multiplier, warn=TRUE, returnX = FALSE, ...) \details{ There are two general classes of methods involving grouped penalties: those that carry out bi-level selection and those that carry out group - selection. Bi-level means carrying out variable selection at the group - level as well as the level of individual covariates (i.e., selecting - important groups as well as important members of those groups). Group - selection selects important groups, and not members within the group -- - i.e., within a group, coefficients will either all be zero or all - nonzero. The \code{grLasso}, \code{grMCP}, and \code{grSCAD} - penalties carry out group selection, while the \code{gel} and - \code{cMCP} penalties carry out bi-level selection. For bi-level - selection, see also the \code{\link{gBridge}} function. For - historical reasons and backwards compatibility, some of these + selection. Bi-level means carrying out variable selection at the + group level as well as the level of individual covariates (i.e., + selecting important groups as well as important members of those + groups). Group selection selects important groups, and not members + within the group -- i.e., within a group, coefficients will either all + be zero or all nonzero. The \code{grLasso}, \code{grMCP}, and + \code{grSCAD} penalties carry out group selection, while the + \code{gel} and \code{cMCP} penalties carry out bi-level selection. + For bi-level selection, see also the \code{\link{gBridge}} function. + For historical reasons and backwards compatibility, some of these penalties have aliases; e.g., \code{gLasso} will do the same thing as \code{grLasso}, but users are encouraged to use \code{grLasso}. @@ -166,8 +167,8 @@ tau = 1/3, group.multiplier, warn=TRUE, returnX = FALSE, ...) \item{lambda}{The sequence of \code{lambda} values in the path.} \item{alpha}{Same as above.} \item{loss}{A vector containing either the residual sum of squares - (\code{"gaussian"}) or negative log-likelihood (\code{"binomial"}) of - the fitted model at each value of \code{lambda}.} + (\code{"gaussian"}) or negative log-likelihood (\code{"binomial"}) + of the fitted model at each value of \code{lambda}.} \item{n}{Number of observations.} \item{penalty}{Same as above.} \item{df}{A vector of length \code{nlambda} containing estimates of diff --git a/man/grpsurv.Rd b/man/grpsurv.Rd index eefcb21..d9de931 100644 --- a/man/grpsurv.Rd +++ b/man/grpsurv.Rd @@ -44,8 +44,8 @@ group.multiplier, warn=TRUE, returnX=FALSE, ...) a sequence of values of length \code{nlambda} is computed automatically, equally spaced on the log scale.} \item{eps}{Convergence threshhold. The algorithm iterates until the - relative change in any coefficient is less than \code{eps}. Default - is \code{.001}.} + RMSD for the change in linear predictors for each coefficient is + less than \code{eps}. Default is \code{0.001}.} \item{max.iter}{Maximum number of iterations (total across entire path). Default is 10000.} \item{dfmax}{Limit on the number of parameters allowed to be nonzero. diff --git a/src/gdfit_gaussian.c b/src/gdfit_gaussian.c index a5a0163..908f2ce 100644 --- a/src/gdfit_gaussian.c +++ b/src/gdfit_gaussian.c @@ -176,10 +176,6 @@ void bedpp_glasso(int *e3, double *yTxxTv1, double *xTv1_sq, double *xTy_sq, e3[g] = 0; // reject } - // debug - // Rprintf("\t K[%d]: %d;\t Reject: %d;\t LHS_temp[%d]: %f;\t LHS[%d]: %f;\t RHS[%d]: %f;\t yTxxTv1[%d]: %f;\t xTv1_sq[%d]: %f;\t xTy_sq[%d]: %f\n", - // g, K[g], e3[g], g, LHS_temp, g, LHS, g, RHS, g, yTxxTv1[g], g, xTv1_sq[g], g, xTy_sq[g]); - } }