Skip to content

Commit

Permalink
Standardization w/ reordering issue fixed and more rigorously tested …
Browse files Browse the repository at this point in the history
…for future releases; Fixes #17
  • Loading branch information
pbreheny committed Jun 15, 2018
1 parent 2732cce commit 776f1c1
Show file tree
Hide file tree
Showing 7 changed files with 32 additions and 84 deletions.
23 changes: 8 additions & 15 deletions R/gBridge.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,14 @@ gBridge <- function(X, y, group=1:ncol(X), family=c("gaussian","binomial","poiss
XG <- newXG(X, group, group.multiplier, m, TRUE)
if (nrow(XG$X) != length(yy)) stop("X and y do not have the same number of observations")

## Set up lambda
# Set up lambda
if (missing(lambda)) {
lambda <- setupLambda.gBridge(XG$X, yy, XG$g, family, alpha, lambda.min, lambda.max, nlambda, gamma, XG$m)
} else {
nlambda <- length(lambda)
}

## Fit
# Fit
n <- length(yy)
p <- ncol(XG$X)
K <- as.numeric(table(XG$g))
Expand All @@ -31,7 +31,7 @@ gBridge <- function(X, y, group=1:ncol(X), family=c("gaussian","binomial","poiss
fit <- .Call("lcdfit_gaussian", XG$X, yy, "gBridge", K1, K0, lambda, alpha, eps, delta, gamma, 0, as.integer(max.iter), as.double(XG$m), as.integer(p), as.integer(max(XG$g)), as.integer(TRUE))
b <- rbind(mean(y), matrix(fit[[1]], nrow=p))
iter <- fit[[2]]
df <- fit[[3]] + 1 ## Intercept
df <- fit[[3]] + 1 # Intercept
loss <- fit[[4]]
} else {
if (family=="binomial") {
Expand All @@ -45,7 +45,7 @@ gBridge <- function(X, y, group=1:ncol(X), family=c("gaussian","binomial","poiss
loss <- fit[[5]]
}

## Eliminate saturated lambda values, if any
# Eliminate saturated lambda values, if any
ind <- !is.na(iter)
b <- b[, ind, drop=FALSE]
iter <- iter[ind]
Expand All @@ -54,18 +54,11 @@ gBridge <- function(X, y, group=1:ncol(X), family=c("gaussian","binomial","poiss
loss <- loss[ind]
if (warn & any(iter==max.iter)) warning("Algorithm failed to converge for all values of lambda")

## Unstandardize
b <- unstandardize(b, XG$center, XG$scale)
beta <- matrix(0, nrow=m*(ncol(X)+1), ncol=length(lambda))
beta[1,] <- b[1,]
if (XG$reorder) {
beta[XG$nz+1,] <- b[-1,]
beta[-1,] <- beta[1+XG$ord.inv,]
} else {
beta[XG$nz+1,] <- b[-1,]
}
# Unstandardize
if (XG$reorder) b[-1,] <- b[1+XG$ord.inv,]
beta <- unstandardize(b, XG)

## Names
# Names
varnames <- c("(Intercept)", XG$names)
if (m > 1) {
beta[2:m,] <- sweep(beta[2:m,,drop=FALSE], 2, beta[1,], FUN="+")
Expand Down
20 changes: 7 additions & 13 deletions R/grpreg.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ grpreg <- function(X, y, group=1:ncol(X), penalty=c("grLasso", "grMCP", "grSCAD"
user.lambda <- TRUE
}

## Fit
# Fit
n <- length(yy)
p <- ncol(XG$X)
K <- as.numeric(table(XG$g))
Expand All @@ -57,7 +57,7 @@ grpreg <- function(X, y, group=1:ncol(X), penalty=c("grLasso", "grMCP", "grSCAD"
else fit <- .Call("gdfit_gaussian", XG$X, yy, penalty, K1, K0, lambda, lam.max, alpha, eps, as.integer(max.iter), gamma, XG$m, as.integer(dfmax), as.integer(gmax), as.integer(user.lambda))
b <- rbind(mean(y), matrix(fit[[1]], nrow=p))
iter <- fit[[2]]
df <- fit[[3]] + 1 ## Intercept
df <- fit[[3]] + 1 # Intercept
loss <- fit[[4]]
} else {
if (family=="binomial") {
Expand All @@ -73,7 +73,7 @@ grpreg <- function(X, y, group=1:ncol(X), penalty=c("grLasso", "grMCP", "grSCAD"
loss <- fit[[5]]
}

## Eliminate saturated lambda values, if any
# Eliminate saturated lambda values, if any
ind <- !is.na(iter)
b <- b[, ind, drop=FALSE]
iter <- iter[ind]
Expand All @@ -82,18 +82,12 @@ grpreg <- function(X, y, group=1:ncol(X), penalty=c("grLasso", "grMCP", "grSCAD"
loss <- loss[ind]
if (warn & any(iter==max.iter)) warning("Algorithm failed to converge for all values of lambda")

## Unstandardize
# Unstandardize
if (strtrim(penalty,2)=="gr") b <- unorthogonalize(b, XG$X, XG$g)
b <- unstandardize(b, XG$center, XG$scale)
beta <- matrix(0, nrow=m*(ncol(X)+1), ncol=length(lambda))
beta[1,] <- b[1,]
if (XG$reorder) {
beta[XG$nz+1,] <- b[1+XG$ord.inv,]
} else {
beta[XG$nz+1,] <- b[-1,]
}
if (XG$reorder) b[-1,] <- b[1+XG$ord.inv,]
beta <- unstandardize(b, XG)

## Names
# Names
varnames <- c("(Intercept)", XG$names)
if (m > 1) {
beta[2:m,] <- sweep(beta[2:m,,drop=FALSE], 2, beta[1,], FUN="+")
Expand Down
2 changes: 1 addition & 1 deletion R/multi.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ multiX <- function(X, m) {
}
cbind(matrix(as.numeric(diag(m)),m*n,m,byrow=TRUE)[,2:m],A)
}
multiG <- function(g, m) {
multiG <- function(g, ncolY) {
structure(c(rep(0, ncolY-1), rep(g, each=ncolY)),
levels=attr(g, 'levels'),
m=attr(g, 'm'))
Expand Down
6 changes: 1 addition & 5 deletions R/newXG.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,8 @@ newXG <- function(X, g, m, ncolY, bilevel) {
if (all(is.na(m))) {
m <- if (bilevel) rep(1, max(g)) else sqrt(table(g[g!=0]))
}
print(head(XX))
print(g)
print(G)
print(m)

# Return
return(list(X=XX, g=g, m=m, reorder=attr(G, 'reorder'), ord.inv=attr(G, 'ord.inv'), names=xnames,
center=center[nz], scale=scale[nz], nz=nz))
center=center, scale=scale, nz=nz))
}
8 changes: 4 additions & 4 deletions R/standardize.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
unstandardize <- function(b, center, scale) {
beta <- matrix(0, nrow=nrow(b), ncol=ncol(b))
beta[-1,] <- b[-1,] / scale
beta[1,] <- b[1,] - crossprod(center, beta[-1,,drop=FALSE])
unstandardize <- function(b, XG) {
beta <- matrix(0, nrow=1+length(XG$scale), ncol=ncol(b))
beta[1 + XG$nz,] <- b[-1,] / XG$scale[XG$nz]
beta[1,] <- b[1,] - crossprod(XG$center, beta[-1,,drop=FALSE])
beta
}
6 changes: 3 additions & 3 deletions inst/tests/standardization-orthogonalization.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ XX <- std[[1]]
center <- std[[2]]
scale <- std[[3]]
bb <- matrix(rnorm(l*(p+1)), nrow=p+1)
b <- grpreg:::unstandardize(bb, center, scale)
b <- grpreg:::unstandardize(bb, list(center=center, scale=scale, nz=which(scale>1e-10)))
check(cbind(1,XX) %*% bb, cbind(1,X) %*% b)

.test = "orthogonalize() orthogonalizes correctly"
Expand Down Expand Up @@ -48,7 +48,7 @@ X[,5] <- X[,4]
XX <- grpreg:::orthogonalize(X, group)
for (j in 1:group[p]) {
ind <- which(attr(XX, "group")==j)
check(crossprod(XX[,ind])/n, diag(length(ind)))
print(check(crossprod(XX[,ind])/n, diag(length(ind))))
}
y <- rnorm(nrow(X))
fit <- grpreg(X, y, group=LETTERS[group])
Expand All @@ -57,7 +57,7 @@ fit <- grpreg(X, y, group=LETTERS[group])
X <- matrix(rnorm(n*p),ncol=p)
X[,4] <- 0
X[,5] <- X[,3]
grp <- newXG(X, group, rep(1, max(group)), 1, FALSE)
grp <- grpreg:::newXG(X, group, rep(1, max(group)), 1, FALSE)
XX <- grp$X
g <- grp$g
for (j in 1:max(g)) {
Expand Down
51 changes: 8 additions & 43 deletions inst/tests/torture.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ X[,7] <- X[,6]
y <- rnorm(n)
b0 <- coef(lm(y~X)); b0[7:8] <- b0[7]/2
b <- coef(fit <- grpreg(X, y, group, penalty="grLasso", lambda.min=0, eps=1e-7), 0)
check(b, b0, tol=0.0001) %>% print
print(check(b, b0, tol=0.0001))
cvfit <- cv.grpreg(X, y, group, penalty="grLasso")
cvfit <- cv.grpreg(X, y, group, penalty="gel")

Expand Down Expand Up @@ -94,46 +94,11 @@ X[,15] <- 0 # Group 4 contains a zero column
y <- rnorm(n)
fit1 <- grpreg(X, y, group, penalty="grLasso", lambda.min=0)
fit2 <- grpreg(X[,perm], y, group[perm], penalty="grLasso", lambda.min=0)
b1 <- coef(fit1, 0)[-1][perm]
b2 <- coef(fit2, 0)[-1]
check(b1, b2, tol=0.01)



b1 <- coef(fit1, 0)[-1][perm]
b1 <- coef(fit1, 0)[-1]
b2 <- coef(fit2, 0)[-1]
check(b1, b2, tol=0.01)
cvfit <- cv.grpreg(X[,perm], y, group[perm], penalty="grLasso")





.test = "grpreg out-of-order groups with constant columns"
n <- 50
group <- rep(c(1,3,0,2),5:2)
p <- length(group)
X <- matrix(rnorm(n*p),ncol=p)
X[,group==2] <- 0
y <- rnorm(n)
mle <- coef(lm(y~X))
mle[!is.finite(mle)] <- 0
grl <- coef(grpreg(X, y, group, penalty="grLasso", lambda.min=0, eps=1e-7), lambda=0)
check(mle, grl, tol=0.01)
cvfit <- cv.grpreg(X, y, group, penalty="grLasso")




y <- rnorm(nrow(X))
perm <- sample(1:ncol(X))
Xp <- X[,perm]
gp <- LETTERS[group][perm]
fit <- grpreg(Xp, y, group=gp, lambda.min=0)
b1 <- coef(fit, 0)
b2 <- coef(lm(y ~ Xp))
b
check(b1[5], 0)
b1[4] <- b1[4] + b1[6]
check(b1[1:4], b2[1:4])

check(b1[perm], b2, tol=0.01) # Checking perm ordering
nz <- which(apply(X, 2, sd)!=0)
fit3 <- grpreg(X[,nz], y, group[nz], penalty="grLasso", lambda.min=0)
b3 <- coef(fit3, 0)[-1]
check(b1[nz], b3, tol=0.01) # Checking dropped group/var
check(coef(fit1)["V6",], coef(fit1)["V7",], tol=0.01) # Checking rank handled correctly

0 comments on commit 776f1c1

Please sign in to comment.