Changed convergence criterion

pbreheny · May 18, 2017 · de1fa6c · de1fa6c
1 parent 08d233a
commit de1fa6c
Show file tree

Hide file tree

Showing 11 changed files with 144 additions and 191 deletions.
diff --git a/NEWS b/NEWS
@@ -1,10 +1,8 @@
-Pending:
-  * Change: Convergence criterion now based on RMSD of linear predictors
-
 3.1-0 ()
   * New: Additional tests and support for coersion of various types with
     respect to both X and y
   * Internal: new SSR-BEDPP feature screening rule for group lasso
+  * Change: Convergence criterion now based on RMSD of linear predictors
   * Change: 'Lung' and 'Birthwt' data sets now use factor representation of
     group, as character vectors are inherently ambiguous with respect to order
   * Change: max.iter now based on total number of iterations for entire path

diff --git a/inst/tests/basic-functionality.R b/inst/tests/basic-functionality.R
@@ -1,5 +1,4 @@
 #source("~/dev/.grpreg.setup.R")
-set.seed(1)
 
 .test = "grpreg() reproduces simple linear regression"
 n <- 5
@@ -12,22 +11,22 @@ nlam=100
 par(mfcol=c(3,2))
 gel <- coef(fit <- grpreg(X, y, group, penalty="gel", nlambda=nlam, lambda.min=0))[,nlam]
 plot(fit, main=fit$penalty)
-check(gel, reg, tolerance=.01)
+check(gel, reg)
 cMCP <- coef(fit <- grpreg(X, y, group, penalty="cMCP", nlambda=nlam, lambda.min=0))[,nlam]
 plot(fit, main=fit$penalty)
-check(cMCP, reg, tolerance=.01)
+check(cMCP, reg)
 bridge <- coef(fit <- gBridge(X, y, group, nlambda=nlam, lambda.min=0))[,1]
 plot(fit, main=fit$penalty)
-check(bridge, reg, tolerance=.01)
+check(bridge, reg)
 grLasso <- coef(fit <- grpreg(X, y, group, penalty="grLasso", nlambda=nlam, lambda.min=0))[,nlam]
 plot(fit, main=fit$penalty)
-check(grLasso, reg, tolerance=.01)
+check(grLasso, reg)
 grMCP <- coef(fit <- grpreg(X, y, group, penalty="grMCP", nlambda=nlam, lambda.min=0))[,nlam]
 plot(fit, main=fit$penalty)
-check(grMCP, reg, tolerance=.01)
+check(grMCP, reg)
 grSCAD <- coef(fit <- grpreg(X, y, group, penalty="grSCAD", nlambda=nlam, lambda.min=0))[,nlam]
 plot(fit, main=fit$penalty)
-check(grSCAD, reg, tolerance=.01)
+check(grSCAD, reg)
 
 .test = "grpreg reproduces linear regression"
 n <- 50
@@ -39,25 +38,25 @@ fit.mle <- lm(y~X)
 reg <- coef(fit.mle)
 nlam=100
 par(mfcol=c(3,2))
-gel <- coef(fit <- grpreg(X, y, group, penalty="gel", nlambda=nlam, lambda.min=0))[,nlam]
+gel <- coef(fit <- grpreg(X, y, group, penalty="gel", nlambda=nlam, lambda.min=0, eps=1e-10))[,nlam]
 plot(fit, main=fit$penalty)
-check(gel, reg, tolerance=.01)
-cMCP <- coef(fit <- grpreg(X, y, group, penalty="cMCP", lambda.min=0))[,100]
+check(gel, reg)
+cMCP <- coef(fit <- grpreg(X, y, group, penalty="cMCP", lambda.min=0, eps=1e-10))[,100]
 plot(fit, main=fit$penalty)
-check(cMCP, reg, tolerance=.01)
-bridge <- coef(fit <- gBridge(X, y, group, lambda.min=0))[,1]
+check(cMCP, reg)
+bridge <- coef(fit <- gBridge(X, y, group, lambda.min=0, eps=1e-10))[,1]
 plot(fit, main=fit$penalty)
-check(bridge, reg, tolerance=.01)
-grLasso <- coef(fit <- grpreg(X, y, group, penalty="grLasso", lambda.min=0))[,100]
+check(bridge, reg)
+grLasso <- coef(fit <- grpreg(X, y, group, penalty="grLasso", lambda.min=0, eps=1e-10))[,100]
 plot(fit, main=fit$penalty)
-check(grLasso, reg, tolerance=.01)
-grMCP <- coef(fit <- grpreg(X, y, group, penalty="grMCP", lambda.min=0))[,100]
+check(grLasso, reg)
+grMCP <- coef(fit <- grpreg(X, y, group, penalty="grMCP", lambda.min=0, eps=1e-10))[,100]
 plot(fit, main=fit$penalty)
-check(grMCP, reg, tolerance=.01)
-grSCAD <- coef(fit <- grpreg(X, y, group, penalty="grSCAD", lambda.min=0))[,100]
+check(grMCP, reg)
+grSCAD <- coef(fit <- grpreg(X, y, group, penalty="grSCAD", lambda.min=0, eps=1e-10))[,100]
 plot(fit, main=fit$penalty)
-check(grSCAD, reg, tolerance=.01)
-check(predict(fit, X)[,100], predict(fit.mle), tolerance=.001)
+check(grSCAD, reg)
+check(predict(fit, X)[,100], predict(fit.mle))
 
 .test = "grpreg reproduces simple logistic regression"
 n <- 20
@@ -68,24 +67,24 @@ group <- 1
 reg <- glm(y~X, family="binomial")$coef
 nlam <- 100
 par(mfcol=c(3,2))
-gel <- coef(fit <- grpreg(X, y, group, penalty="gel", nlambda=nlam, lambda.min=0, family="binomial"))[,nlam]
+gel <- coef(fit <- grpreg(X, y, group, penalty="gel", nlambda=nlam, lambda.min=0, family="binomial", eps=1e-10))[,nlam]
 plot(fit, main=fit$penalty)
-check(gel, reg, tolerance=.01)
-cMCP <- coef(fit <- grpreg(X, y, group, penalty="cMCP", nlambda=nlam, lambda.min=0, family="binomial", gamma=12))[,nlam]
+check(gel, reg)
+cMCP <- coef(fit <- grpreg(X, y, group, penalty="cMCP", nlambda=nlam, lambda.min=0, family="binomial", gamma=9, eps=1e-10))[,nlam]
 plot(fit, main=fit$penalty)
-check(cMCP, reg, tolerance=.01)
-bridge <- coef(fit <- gBridge(X, y, group, lambda.min=0, nlambda=nlam, family="binomial"))[,1]
+check(cMCP, reg)
+bridge <- coef(fit <- gBridge(X, y, group, lambda.min=0, nlambda=nlam, family="binomial", eps=1e-10))[,1]
 plot(fit, main=fit$penalty)
-check(bridge, reg, tolerance=.01)
-grLasso <- coef(fit <- grpreg(X, y, group, penalty="grLasso", nlambda=nlam, lambda.min=0, family="binomial"))[,nlam]
+check(bridge, reg)
+grLasso <- coef(fit <- grpreg(X, y, group, penalty="grLasso", nlambda=nlam, lambda.min=0, family="binomial", eps=1e-10))[,nlam]
 plot(fit, main=fit$penalty)
-check(grLasso, reg, tolerance=.01)
-grMCP <- coef(fit <- grpreg(X, y, group, penalty="grMCP", nlambda=nlam, lambda.min=0, family="binomial"))[,nlam]
+check(grLasso, reg)
+grMCP <- coef(fit <- grpreg(X, y, group, penalty="grMCP", nlambda=nlam, lambda.min=0, family="binomial", eps=1e-10))[,nlam]
 plot(fit, main=fit$penalty)
-check(grMCP, reg, tolerance=.01)
-grSCAD <- coef(fit <- grpreg(X, y, group, penalty="grSCAD", nlambda=nlam, lambda.min=0, family="binomial"))[,nlam]
+check(grMCP, reg)
+grSCAD <- coef(fit <- grpreg(X, y, group, penalty="grSCAD", nlambda=nlam, lambda.min=0, family="binomial", eps=1e-10))[,nlam]
 plot(fit, main=fit$penalty)
-check(grSCAD, reg, tolerance=.01)
+check(grSCAD, reg)
 
 .test = "grpreg() reproduces logistic regression"
 n <- 50
@@ -96,26 +95,26 @@ y <- runif(n) > .5
 fit.mle <- glm(y~X, family="binomial")
 reg <- coef(fit.mle)
 par(mfcol=c(3,2))
-gel <- coef(fit <- grpreg(X, y, group, penalty="gel", family="binomial"))[,100]
+gel <- coef(fit <- grpreg(X, y, group, penalty="gel", family="binomial", eps=1e-10, lambda.min=0))[,100]
 plot(fit, main=fit$penalty)
-check(gel, reg, tolerance=.01)
-cMCP <- coef(fit <- grpreg(X, y, group, penalty="cMCP", family="binomial"))[,100]
+check(gel, reg)
+cMCP <- coef(fit <- grpreg(X, y, group, penalty="cMCP", family="binomial", eps=1e-10, lambda.min=0))[,100]
 plot(fit, main=fit$penalty)
-check(cMCP, reg, tolerance=.01)
-bridge <- coef(fit <- gBridge(X, y, group, family="binomial"))[,1]
+check(cMCP, reg)
+bridge <- coef(fit <- gBridge(X, y, group, family="binomial", eps=1e-10, lambda.min=0))[,1]
 plot(fit, main=fit$penalty)
-check(bridge, reg, tolerance=.01)
-grLasso <- coef(fit <- grpreg(X, y, group, penalty="grLasso", family="binomial"))[,100]
+check(bridge, reg)
+grLasso <- coef(fit <- grpreg(X, y, group, penalty="grLasso", family="binomial", eps=1e-10, lambda.min=0))[,100]
 plot(fit, main=fit$penalty)
-check(grLasso, reg, tolerance=.01)
-grMCP <- coef(fit <- grpreg(X, y, group, penalty="grMCP", family="binomial", gamma=2))[,100]
+check(grLasso, reg)
+grMCP <- coef(fit <- grpreg(X, y, group, penalty="grMCP", family="binomial", gamma=2, eps=1e-10, lambda.min=0))[,100]
 plot(fit, main=fit$penalty)
-check(grMCP, reg, tolerance=.01)
-grSCAD <- coef(fit <- grpreg(X, y, group, penalty="grSCAD", family="binomial", gamma=2.1))[,100]
+check(grMCP, reg)
+grSCAD <- coef(fit <- grpreg(X, y, group, penalty="grSCAD", family="binomial", gamma=2.1, eps=1e-10, lambda.min=0))[,100]
 plot(fit, main=fit$penalty)
-check(grSCAD, reg, tolerance=.01)
-check(predict(fit, X)[,100], predict(fit.mle), tolerance=.001)
-check(predict(fit, X, type="response")[,100], predict(fit.mle, type="response"), tolerance=.001)
+check(grSCAD, reg)
+check(predict(fit, X)[,100], predict(fit.mle))
+check(predict(fit, X, type="response")[,100], predict(fit.mle, type="response"))
 
 .test = "grpreg() reproduces poisson regression"
 n <- 50
@@ -126,52 +125,23 @@ y <- sample(1:5, n, replace=TRUE)
 fit.mle <- glm(y~X, family="poisson")
 reg <- coef(fit.mle)
 par(mfcol=c(3,2))
-gel <- coef(fit <- grpreg(X, y, group, penalty="gel", family="poisson"))[,100]
+gel <- coef(fit <- grpreg(X, y, group, penalty="gel", family="poisson", eps=1e-10, lambda.min=0))[,100]
 plot(fit, main=fit$penalty)
-check(gel, reg, tolerance=.01)
-cMCP <- coef(fit <- grpreg(X, y, group, penalty="cMCP", family="poisson"))[,100]
+check(gel, reg)
+cMCP <- coef(fit <- grpreg(X, y, group, penalty="cMCP", family="poisson", eps=1e-10, lambda.min=0))[,100]
 plot(fit, main=fit$penalty)
-check(cMCP, reg, tolerance=.01)
-bridge <- coef(fit <- gBridge(X, y, group, family="poisson"))[,1]
+check(cMCP, reg)
+bridge <- coef(fit <- gBridge(X, y, group, family="poisson", eps=1e-10, lambda.min=0))[,1]
 plot(fit, main=fit$penalty)
-check(bridge, reg, tolerance=.01)
-grLasso <- coef(fit <- grpreg(X, y, group, penalty="grLasso", family="poisson"))[,100]
+check(bridge, reg)
+grLasso <- coef(fit <- grpreg(X, y, group, penalty="grLasso", family="poisson", eps=1e-10, lambda.min=0))[,100]
 plot(fit, main=fit$penalty)
-check(grLasso, reg, tolerance=.01)
-grMCP <- coef(fit <- grpreg(X, y, group, penalty="grMCP", family="poisson", gamma=2))[,100]
+check(grLasso, reg)
+grMCP <- coef(fit <- grpreg(X, y, group, penalty="grMCP", family="poisson", gamma=2, eps=1e-10, lambda.min=0))[,100]
 plot(fit, main=fit$penalty)
-check(grMCP, reg, tolerance=.01)
-grSCAD <- coef(fit <- grpreg(X, y, group, penalty="grSCAD", family="poisson", gamma=2.1))[,100]
+check(grMCP, reg)
+grSCAD <- coef(fit <- grpreg(X, y, group, penalty="grSCAD", family="poisson", gamma=2.1, eps=1e-10, lambda.min=0))[,100]
 plot(fit, main=fit$penalty)
-check(grSCAD, reg, tolerance=.01)
-check(predict(fit, X)[,100], predict(fit.mle), tolerance=.001)
-check(predict(fit, X, type="response")[,100], predict(fit.mle, type="response"), tolerance=.001)
-
-
-
-
-.test = "grpreg exactly reproduces linear regression"
-n <- 50
-p <- 10
-X <- matrix(rnorm(n*p),ncol=p)
-y <- rnorm(n)
-group <- rep(0:3,4:1)
-group <- 1:10
-reg <- coef(fit.mle <- lm(y~X))
-gel <- coef(fit <- grpreg(X, y, group, penalty="gel", lambda.min=0, eps=1e-10), lambda=0)
-grl <- coef(fit <- grpreg(X, y, group, penalty="grLasso", lambda.min=0, eps=1e-10), lambda=0)
-check(reg, grl)
-check(reg, gel)
-
-.test = "grpreg exactly reproduces linear regression"
-n <- 50
-p <- 10
-X <- ncvreg::std(matrix(rnorm(n*p),ncol=p))
-y <- rnorm(n)
-group <- rep(0:3,4:1)
-group <- 1:10
-reg <- coef(fit.mle <- lm(y~X))
-gel <- coef(fit <- grpreg(X, y, group, penalty="gel", lambda.min=0, eps=1e-10), lambda=0)
-grl <- coef(fit <- grpreg(X, y, group, penalty="grLasso", lambda.min=0, eps=1e-10), lambda=0)
-check(reg, grl)
-check(reg, gel)
+check(grSCAD, reg)
+check(predict(fit, X)[,100], predict(fit.mle))
+check(predict(fit, X, type="response")[,100], predict(fit.mle, type="response"))
diff --git a/src/gdfit_binomial.c b/src/gdfit_binomial.c
@@ -4,7 +4,6 @@
 #include <R_ext/Rdynload.h>
 #include <R.h>
 #include <R_ext/Applic.h>
-int checkConvergence(double *beta, double *beta_old, double eps, int l, int J);
 double crossprod(double *x, double *y, int n, int j);
 double sum(double *x, int n);
 double norm(double *x, int p);
@@ -15,7 +14,7 @@ double MCP(double theta, double l, double a);
 double dMCP(double theta, double l, double a);
 
 // Group descent update -- binomial
-double gd_binomial(double *b, double *x, double *r, double *eta, int g, int *K1, int n, int l, int p, const char *penalty, double lam1, double lam2, double gamma, SEXP df, double *a, double maxChange) {
+void gd_binomial(double *b, double *x, double *r, double *eta, int g, int *K1, int n, int l, int p, const char *penalty, double lam1, double lam2, double gamma, SEXP df, double *a, double *maxChange) {
 
   // Calculate z
   int K = K1[g+1] - K1[g];
@@ -34,7 +33,7 @@ double gd_binomial(double *b, double *x, double *r, double *eta, int g, int *K1,
     for (int j=K1[g]; j<K1[g+1]; j++) {
       b[l*p+j] = len * z[j-K1[g]] / z_norm;
       double shift = b[l*p+j]-a[j];
-      if (fabs(shift) > maxChange) maxChange = fabs(shift);
+      if (fabs(shift) > maxChange[0]) maxChange[0] = fabs(shift);
       for (int i=0; i<n; i++) {
 	double si = shift*x[j*n+i];
 	r[i] -= si;
@@ -46,7 +45,6 @@ double gd_binomial(double *b, double *x, double *r, double *eta, int g, int *K1,
   // Update df
   if (len > 0) REAL(df)[l] += K * len / z_norm;
   Free(z);
-  return(maxChange);
 }
 
 SEXP gdfit_binomial(SEXP X_, SEXP y_, SEXP penalty_, SEXP K1_, SEXP K0_, SEXP lambda, SEXP alpha_, SEXP eps_, SEXP max_iter_, SEXP gamma_, SEXP group_multiplier, SEXP dfmax_, SEXP gmax_, SEXP warn_, SEXP user_) {
@@ -101,7 +99,7 @@ SEXP gdfit_binomial(SEXP X_, SEXP y_, SEXP penalty_, SEXP K1_, SEXP K0_, SEXP la
   int *e = Calloc(J, int);
   for (int g=0; g<J; g++) e[g] = 0;
   int converged, lstart, ng, nv, violations;
-  double shift, l1, l2;
+  double shift, l1, l2, maxChange;
 
   // Initialization
   double ybar = sum(y, n)/n;
@@ -146,7 +144,6 @@ SEXP gdfit_binomial(SEXP X_, SEXP y_, SEXP penalty_, SEXP K1_, SEXP K0_, SEXP la
 	converged = 0;
 	INTEGER(iter)[l]++;
         tot_iter++;
-        double maxChange = 0;
 
 	// Approximate L
 	REAL(Dev)[l] = 0;
@@ -178,10 +175,12 @@ SEXP gdfit_binomial(SEXP X_, SEXP y_, SEXP penalty_, SEXP K1_, SEXP K0_, SEXP la
 	  eta[i] += shift;
 	}
 	REAL(df)[l] = 1;
+        maxChange = fabs(shift);
 
 	// Update unpenalized covariates
 	for (int j=0; j<K0; j++) {
 	  shift = crossprod(X, r, n, j)/n;
+          if (fabs(shift) > maxChange) maxChange = fabs(shift);
 	  b[l*p+j] = shift + a[j];
 	  for (int i=0; i<n; i++) {
 	    double si = shift * X[n*j+i];
@@ -195,7 +194,7 @@ SEXP gdfit_binomial(SEXP X_, SEXP y_, SEXP penalty_, SEXP K1_, SEXP K0_, SEXP la
 	for (int g=0; g<J; g++) {
 	  l1 = lam[l] * m[g] * alpha;
 	  l2 = lam[l] * m[g] * (1-alpha);
-	  if (e[g]) maxChange = gd_binomial(b, X, r, eta, g, K1, n, l, p, penalty, l1, l2, gamma, df, a, maxChange);
+	  if (e[g]) gd_binomial(b, X, r, eta, g, K1, n, l, p, penalty, l1, l2, gamma, df, a, &maxChange);
 	}
 
 	// Check convergence
@@ -210,7 +209,7 @@ SEXP gdfit_binomial(SEXP X_, SEXP y_, SEXP penalty_, SEXP K1_, SEXP K0_, SEXP la
 	if (e[g]==0) {
 	  l1 = lam[l] * m[g] * alpha;
 	  l2 = lam[l] * m[g] * (1-alpha);
-	  gd_binomial(b, X, r, eta, g, K1, n, l, p, penalty, l1, l2, gamma, df, a, 0);
+	  gd_binomial(b, X, r, eta, g, K1, n, l, p, penalty, l1, l2, gamma, df, a, &maxChange);
 	  if (b[l*p+K1[g]] != 0) {
 	    e[g] = 1;
 	    violations++;

diff --git a/src/gdfit_cox.c b/src/gdfit_cox.c
@@ -4,7 +4,6 @@
 #include <R_ext/Rdynload.h>
 #include <R.h>
 #include <R_ext/Applic.h>
-int checkConvergence(double *beta, double *beta_old, double eps, int l, int J);
 double crossprod(double *x, double *y, int n, int j);
 double norm(double *x, int p);
 double S(double z, double l);
@@ -14,7 +13,7 @@ double Fs(double z, double l1, double l2, double gamma);
 // Group descent update -- cox
 void gd_cox(double *b, double *x, double *r, double *eta, double v, int g,
 	    int *K1, int n, int l, int p, const char *penalty, double lam1,
-	    double lam2, double gamma, SEXP df, double *a) {
+	    double lam2, double gamma, SEXP df, double *a, double *maxChange) {
 
   // Calculate z
   int K = K1[g+1] - K1[g];
@@ -32,6 +31,7 @@ void gd_cox(double *b, double *x, double *r, double *eta, double v, int g,
     for (int j=K1[g]; j<K1[g+1]; j++) {
       b[l*p+j] = len * z[j-K1[g]] / z_norm;
       double shift = b[l*p+j]-a[j];
+      if (fabs(shift) > maxChange[0]) maxChange[0] = fabs(shift);
       for (int i=0; i<n; i++) {
 	double si = shift*x[j*n+i];
 	r[i] -= si;
@@ -96,8 +96,8 @@ SEXP gdfit_cox(SEXP X_, SEXP d_, SEXP penalty_, SEXP K1_, SEXP K0_, SEXP lambda,
   double *eta = Calloc(n, double);
   int *e = Calloc(J, int);
   for (int g=0; g<J; g++) e[g] = 0;
-  int converged, lstart, ng, nv, violations;
-  double shift, l1, l2, nullDev, v, s;
+  int lstart, ng, nv, violations;
+  double shift, l1, l2, nullDev, v, s, maxChange;
 
   // Initialization
   // If lam[0]=lam_max, skip lam[0] -- closed form sol'n available
@@ -173,8 +173,10 @@ SEXP gdfit_cox(SEXP X_, SEXP d_, SEXP penalty_, SEXP K1_, SEXP K0_, SEXP lambda,
 	}
 
 	// Update unpenalized covariates
+        maxChange = 0;
 	for (int j=0; j<K0; j++) {
 	  shift = crossprod(X, r, n, j)/n;
+          if (fabs(shift) > maxChange) maxChange = fabs(shift);
 	  b[l*p+j] = shift + a[j];
 	  for (int i=0; i<n; i++) {
 	    double si = shift * X[n*j+i];
@@ -189,13 +191,12 @@ SEXP gdfit_cox(SEXP X_, SEXP d_, SEXP penalty_, SEXP K1_, SEXP K0_, SEXP lambda,
 	  l1 = lam[l] * m[g] * alpha;
 	  l2 = lam[l] * m[g] * (1-alpha);
 	  if (e[g]) gd_cox(b, X, r, eta, v, g, K1, n, l, p, penalty, l1, l2,
-			   gamma, df, a);
+			   gamma, df, a, &maxChange);
 	}
 
 	// Check convergence
-	converged = checkConvergence(b, a, eps, l, p);
-	for (int j=0; j<p; j++) a[j] = b[l*p+j];
-	if (converged) break;
+        for (int j=0; j<p; j++) a[j] = b[l*p+j];
+        if (maxChange < eps) break;
       }
 
       // Scan for violations
@@ -204,7 +205,7 @@ SEXP gdfit_cox(SEXP X_, SEXP d_, SEXP penalty_, SEXP K1_, SEXP K0_, SEXP lambda,
 	if (e[g]==0) {
 	  l1 = lam[l] * m[g] * alpha;
 	  l2 = lam[l] * m[g] * (1-alpha);
-	  gd_cox(b, X, r, eta, v, g, K1, n, l, p, penalty, l1, l2, gamma, df, a);
+	  gd_cox(b, X, r, eta, v, g, K1, n, l, p, penalty, l1, l2, gamma, df, a, &maxChange);
 	  if (b[l*p+K1[g]] != 0) {
 	    e[g] = 1;
 	    violations++;