EBRR.G

/* This is a GAUSS procedure for empirical-Bayes or semi-Bayes estimation
   of correlated person-time rates, or of regression coefficients from
   multiplicative relative-risk models (e.g., logistic, Cox, or
   exponential-Poisson regression). It is based on generalizations of
   Morris's 1983 (JASA) formulas (see Greenland, Stat Med 1993).
   Inputs: b = estimated log rates or coefficients (first-stage
               parameter estimates),
           v = estimated covariance matrix for b
               (may be vector of variances if the b are independent),
           z = prior design matrix (set to 0 for shrinkage to 0 vector),
           t2pass = prior variance for semi-Bayes estimation (may be vector);
                    set t2pass = 0 if prior variance is to be
                    estimated (empirical-Bayes estimation),
           bnames = vector of names associated with the components of b & bp;
                    set to 0 for no printed output,
           yname = name of first-stage regressand (outcome, disease);
                   may be 0 if there is no printed output,
           bnames2 = vector of names associated with the components of bs;
                     set to 0 if you do not want the procedure to print
                     estimates for the second-stage parameters.
    Outputs: bs = second-stage coefficient (hyperparameter) estimates,
             vs = estimated covariance matrix for bs,
             bp = estimated posterior log rates or coefficents,
             vp = estimated posterior covariance for coefficients
             t2 = t2pass for semi-Bayes, estimated prior variance for
                  empirical Bayes;
             dfhm = approximate df for hierarchical model, bounded by
                   cols(z) as tau2 goes to 0, rows(z) as tau2 goes up.
Globals (may be set by user from calling program; otherwise defaults are used):
      _bprior = prior means to shrink toward when z = 0 (default is 0)
      _priormu = set to one to show estimated prior means for bp (default is 0)
      _cc = 1 to use the curvature correction to calculate point estimates
            when prior variance is estimated -- default is 1 and recommended,
            but correction may be turned off (set to _cc zero) to give results
            in better agreement with PQL procedures
      _clevel = confidence level for confidence intervals (default .95)
      _bcriter = convergence criterion for b (default is .0005)
      _maxit = maximum number of iterations (default is 50)
*/
proc (6) = ebrr(b,v,z,t2pass,bnames,yname,bnames2);
declare matrix _priormu = 0; declare matrix _bprior = 0;
declare matrix _cc = 1;
declare matrix _maxit = 50; declare matrix _criter = .0005;
local np,tozero,eb,np2,vecv,df2,eyenp,e,t2,undisp,cnv,iter,w,ws,wv,wz,
      bs,vs,rsst,rms,t2old,bp,cc,hatw,inf,wvce,hatp,vp,it2,dfhm,dft;
bnames = 0$+bnames; bnames2 = 0$+bnames2;
/* no. 1st-stage parameters: */ np = rows(b);
/* if z = 0 shrink to zero: */ tozero = sumall(abs(z)) eq 0;
if not tozero and rows(z) ne rows(b);
   "EBRR.G: rows b and rows z are ";; rows(b);; rows(z); end; endif;
eb = sumall(t2pass) eq 0;
np2 = (1-tozero)*cols(z);
if np2 ge np-2*eb; "EBRR.G: Too many 2nd-stage parameters."; end; endif;
/* indicate vector v (indep. 1st-stage estimates): */ vecv = cols(v) eq 1;
/* 2nd-stage df: */ df2 = np - np2; eyenp = eye(np);
/* Begin estimation procedure: */
/* Subtract prior means: */ b = b - _bprior;
/* Initialize loop variables for estimation of prior variance: */
   if eb; t2 = 0; else; t2 = t2pass; endif;
if bnames[1]; print; "PROC EBRR.G:";;
   if eb; "Empirical-Bayes estimation for correlated outcomes";
          " - Morris estimate of prior variance.";
      else; "Semi-Bayes estimation for correlated outcomes.";
   endif;
   if rows(bnames) ne rows(b);
     "INPUT ERROR: No. 1st-stage names not equal to no. of coefficients."; end;
   endif;
   if bnames2[1] and rows(bnames2) ne cols(z);
     "INPUT ERROR: No. 2nd-stage names not equal to no. of regressors."; end;
   endif;
   ""$+ftocv(np,1,0)$+" coefficients shrunk toward ";; format /rds 7,3;
   if tozero; "prior means of    ";; _bprior';
      else;"estimated deviations of prior means from ";; _bprior';
           ""$+ftocv(np2,1,0)$+" estimated second-stage parameters.";
   endif;
   if eb; "Estimated";; else; "Pre-specified";; endif;
   " prior standard deviations:";; sqrt(t2');
endif;
trap 1; if vecv; inf = eyenp./v; else; inf = invpd(v); endif;
        if scalerr(inf); "V SINGULAR IN EBRR.G"; retp(0,0,0,0,0,0); endif;
trap 0;
/* Estimate second-stage coefficients (and prior variance if EB): */
   /* underdispersion & convergence indicators, and counter: */
      undisp = 0; cnv = 0; iter = 0;
   do until cnv or (iter ge _maxit); iter = iter + 1;
      /* Do prior regression: */
      /* weight matrix: */
      if vecv; w = 1/(v + t2); wv = w.*v; wz = w.*z;
         else; w = invpd(v + t2.*eyenp); wv = w*v; wz = w*z;
      endif;
      ws = sumall(w);
      /* invert 2nd-stage information: */
         if tozero; vs = 0; else; vs = invpd(z'wz); endif;
      /* 2nd-stage coefficient estimates: */ bs = vs*(wz'b);
      /* residual from 2nd-stage estimates: */ e = b - z*bs;
      /* total RSS: */
      if vecv; rsst = e'(w.*e); else; rsst = e'w*e; endif;
      /* residual mean square: */ rms = np*rsst/(df2*ws);
      /* Exit loop if prior variance is fixed in advance (semi-Bayes)
         or if there is underdispersion: */
         if not eb; break; endif;
         if undisp gt 1; " UNDERDISPERSION IN EBRR.G ";; break; endif;
       /* update t2: */ t2old = t2; t2 = max(rms - sumall(wv)/ws,0);
       /* count occurrences of underdispersion (t2 le 0): */
          undisp = undisp + (t2 le 0);
       if bnames[1]; /* give estimated prior variance: */
          format 4,0; "                   at iteration ";; iter;;":";;
          format 8,4; t2;
       endif;
       /* Test for convergence: */
          if undisp lt 1 and abs(t2-t2old) lt _criter; cnv = 1; endif;
   endo;
if iter ge _maxit;
    "WARNING: No convergence of EB after ";;
    format /rdn 4,0; iter;;" iterations;";; format 8,4;
endif;
/* weight for the prior expectations of the first-stage parameters: */
   if eb; /* use curvature correction for EB: */
       cc = max((df2 - 2)/df2,0); else; cc = 1; endif;
/* projection of b to prior mean z*bs: */ hatw = z*vs*wz';
/* hatp = projection of b to posterior mean bp;
   vp = estimated posterior covariance: */
   if vecv; vp = (eyenp - cc*wv.*(eyenp - hatw)).*v';
      else; vp = v - cc*wv'(eyenp - hatw)*v;
   endif;
if eb; /* correct 1st-stage variance for estimation of the prior variance: */
      if vecv; wvce = wv.*(e.*sqrt((sumc(wv)/ws + t2)./(v + t2)));
         else; wvce = wv*(e.*sqrt((sumall(wv)/ws + t2)./(diag(v) + t2)));
      endif;
      vp = vp + (2*cc/df2)*wvce*wvce';
endif;
   it2 = ((t2 .ne 0)./(t2 + (t2 .eq 0))).*eyenp;
   /* set cc to 1 if _cc = 0: */ cc = 1 - (1-cc)*_cc;
   hatp = invpd(inf + it2*eyenp)*(inf + (1-cc)*it2 + cc*it2*hatw);
/* EB-posterior expectations of 1st-stage parameters,
   adding back prior means: */ bp = hatp*b + _bprior;
/* Approx. df in hierarchical model: */ dfhm = tracemat(hatp);
if bnames[1]; /* print results: */ format /rds 7,3; print;
   "With standard errors from estimated posterior covariance matrix:";
   if sumc(diag(vp) .lt 0); "NEGATIVE VARIANCE ESTIMATE FOR";;
      local nv; nv = selif(seqa(1,1,np),diag(vp) .lt 0); $bnames[nv]';
   endif;
   rreport(bp,sqrt(max(diag(vp),0)),bnames,yname,-1,-1,0,1);
   df2 = df2*(-1)^(t2pass eq 0);
   if _priormu;
     "Estimated prior means for the above coefficients,";
     " and standard errors for these estimated means:";
     rreport(z*bs,sqrt(diag(z*vs*z')),bnames,yname,-1,-1,df2,0);
   endif;
   if bnames2[1];
      "Estimated 2nd-stage coefficients:";
      call rreport(bs,sqrt(diag(vs)),bnames2,"beta",-1,-1,df2,1);
   endif;
   /* total residual df */ dft = np - tracemat(hatw);
   "Total rss and df: ";; format 10,3; rsst;; dft;;
   if eb; " (should be approx. equal)";
      else; ", chi-squared p = :";; cdfchic(rsst,dft);
   endif;
endif;
retp(bs,vs,bp,vp,t2,dfhm);
endp;