Appendix.Rmd

---
output:
  pdf_document: default
  html_document: default
---


```{r, include=FALSE}

t15.data <- data.frame(read.csv("2015 BRFSS Data.csv")[,2:6])
t14.data <- data.frame(read.csv("2014 BRFSS Data.csv")[,2:6])
t13.data <- data.frame(read.csv("2013 BRFSS Data.csv")[,2:6])
t12.data <- data.frame(read.csv("2012 BRFSS Data.csv")[,2:6])
t11.data <- data.frame(read.csv("2011 BRFSS Data.csv")[,2:6])


### Cleaning data ###

cleaner <- function(x){
      df <- x
      for(i in 1:5){
            df <- df[!is.na(df[,i]),] 
      }
      
      for(i in 1:3){
           df <- df[df[,i] != 7 & df[,i] != 9,] 
      }

      
      df
}

t11.data <- cleaner(t11.data)
t12.data <- cleaner(t12.data)
t13.data <- cleaner(t13.data)
t14.data <- cleaner(t14.data)
t15.data <- cleaner(t15.data)

# Since we are only interested in whether or not respondents have a personal doctor we will treat those who reported having multiple doctors and those who reported only having one doctor the same. Now individuals with personal doctors will be coded as '1' and those without as '3'.

persdocmod <- function(x){
      df <- x
      df[df[,2] == 2,2] <- 1
      
      df
}

t11.data <- persdocmod(t11.data)
t12.data <- persdocmod(t12.data)
t13.data <- persdocmod(t13.data)
t14.data <- persdocmod(t14.data)
t15.data <- persdocmod(t15.data)

allyears <- rbind(t11.data, t12.data, t13.data, t14.data, t15.data)


### Create Summary Tables with Weighted Values ###


library(dplyr)

persdoc.table <- function(x){
      
      tab <- x %>% group_by(hlthpln1, persdoc2) %>% summarize(wt_sum = sum(x.llcpwt)) %>% 
      tidyr::spread(key = persdoc2, value = wt_sum) %>% ungroup()
      
      wf <- length(x$hlthpln1)/sum(x$x.llcpwt)
      
      tab[,2:ncol(tab)] <- tab[,2:ncol(tab)]*wf
      
      tab
}

ST_PD_t11 <- persdoc.table(t11.data)
ST_PD_t12 <- persdoc.table(t12.data)
ST_PD_t13 <- persdoc.table(t13.data)
ST_PD_t14 <- persdoc.table(t14.data)
ST_PD_t15 <- persdoc.table(t15.data)
ST_PD_ay <- persdoc.table(allyears)

checkup.table <- function(x){
      
      tab <- x %>% group_by(hlthpln1, checkup1) %>% summarize(wt_sum = sum(x.llcpwt)) %>% 
      tidyr::spread(key = checkup1, value = wt_sum) %>% ungroup()
      
      wf <- length(x$hlthpln1)/sum(x$x.llcpwt)
      
      tab[,2:ncol(tab)] <- tab[,2:ncol(tab)]*wf
      
      tab
}

ST_CU_t11 <- checkup.table(t11.data)
ST_CU_t12 <- checkup.table(t12.data)
ST_CU_t13 <- checkup.table(t13.data)
ST_CU_t14 <- checkup.table(t14.data)
ST_CU_t15 <- checkup.table(t15.data)
ST_CU_ay <- checkup.table(allyears)

ST_CUgPD_t11 <- checkup.table(t11.data[t11.data$persdoc2 == 1,])
ST_CUgPD_t12 <- checkup.table(t12.data[t12.data$persdoc2 == 1,])
ST_CUgPD_t13 <- checkup.table(t13.data[t13.data$persdoc2 == 1,])
ST_CUgPD_t14 <- checkup.table(t14.data[t14.data$persdoc2 == 1,])
ST_CUgPD_t15 <- checkup.table(t15.data[t15.data$persdoc2 == 1,])
ST_CUgPD_ay <- checkup.table(allyears[allyears$persdoc2 == 1,])

```


```{r, include=F}

PD.props <- function(x){
      vec <- rep(0,4)
      c <- 0
      for(i in 1:2){
            for(j in 2:3){
                  c <- c + 1
                  vec[c] <- as.numeric(x[i,j]/sum(x[i,2:3]))   
            }
      }
      matrix(vec, nrow = 2, ncol = 2, byrow = TRUE)
}


PD11 <- PD.props(ST_PD_t11)
PD12 <- PD.props(ST_PD_t12)
PD13 <- PD.props(ST_PD_t13)
PD14 <- PD.props(ST_PD_t14)
PD15 <- PD.props(ST_PD_t15)
PDay <- PD.props(ST_PD_ay)

PD.proportions <- matrix(c(PD11[,1],PD12[,1],PD13[,1],PD14[,1],PD15[,1],PDay[,1], 0, 0), nrow = 2, ncol = 7,
                         dimnames = list(c("Insured", "Uninsured"),
                                         c("2011", "2012", "2013", "2014", "2015", "All Years", "Empty"))
                         )

```

```{r, include=F}

CU.props <- function(x){
      
      vec <- rep(0,10)
      c <- 0
      
      for(i in 1:2){
            for(j in 2:6){
                  c <- c+1
                  vec[c] <- as.numeric(x[i,j]/sum(x[i,2:6]))
            }
      }
      
      matrix(vec, nrow = 2, ncol = 5, byrow = TRUE)
}

CUgPD.props <- function(x){
      
      vec <- rep(0,10)
      c <- 0
      
      for(i in 1:2){
            for(j in 2:6){
                  c <- c+1
                  vec[c] <- as.numeric(x[i,j]/sum(x[i,2:6]))
            }
      }
      
      matrix(vec, nrow = 2, ncol = 5, byrow = TRUE)
}

CUgPD11 <- CUgPD.props(ST_CUgPD_t11)
CUgPD12 <- CUgPD.props(ST_CUgPD_t12)
CUgPD13 <- CUgPD.props(ST_CUgPD_t13)
CUgPD14 <- CUgPD.props(ST_CUgPD_t14)
CUgPD15 <- CUgPD.props(ST_CUgPD_t15)
CUgPDay <- CUgPD.props(ST_CUgPD_ay)

CU11 <- CU.props(ST_CU_t11)
CU12 <- CU.props(ST_CU_t12)
CU13 <- CU.props(ST_CU_t13)
CU14 <- CU.props(ST_CU_t14)
CU15 <- CU.props(ST_CU_t15)
CUay <- CU.props(ST_CU_ay)

vec <- c(CU11[,1], CU12[,1], CU13[,1], CU14[,1], CU15[,1], CUay[,1])

DU.l1.proportions <- matrix(vec, nrow = 2, ncol = 6,
                         dimnames = list( c("Insured", "Uninsured"),
                          c("2011", "2012", "2013", "2014", "2015", "All") )
                         )

vec <- c(sum(CU11[1,4:5]), sum(CU11[2,4:5]),
         sum(CU12[1,4:5]), sum(CU12[2,4:5]),
         sum(CU13[1,4:5]), sum(CU13[2,4:5]),
         sum(CU14[1,4:5]), sum(CU14[2,4:5]),
         sum(CU15[1,4:5]), sum(CU15[2,4:5]),
         sum(CUay[1,4:5]), sum(CUay[2,4:5]),
         0,0)


DU.g1.proportions <- matrix(vec, nrow = 2, ncol = 7,
                         dimnames = list( c("Insured", "Uninsured"),
                          c("2011", "2012", "2013", "2014", "2015", "All", "" ) )
                         )


### No for doctor utilization given personal doctor

vec <- c(CUgPD11[,1], CUgPD12[,1], CUgPD13[,1], CUgPD14[,1], CUgPD15[,1], CUgPDay[,1])

DUgPD.l1.proportions <- matrix(vec, nrow = 2, ncol = 6,
                         dimnames = list( c("Insured", "Uninsured"),
                          c("2011", "2012", "2013", "2014", "2015", "All") )
                         )

vec <- c(sum(CUgPD11[1,4:5]), sum(CUgPD11[2,4:5]),
         sum(CUgPD12[1,4:5]), sum(CUgPD12[2,4:5]),
         sum(CUgPD13[1,4:5]), sum(CUgPD13[2,4:5]),
         sum(CUgPD14[1,4:5]), sum(CUgPD14[2,4:5]),
         sum(CUgPD15[1,4:5]), sum(CUgPD15[2,4:5]),
         sum(CUgPDay[1,4:5]), sum(CUgPDay[2,4:5]),
         0,0)


DUgPD.g1.proportions <- matrix(vec, nrow = 2, ncol = 7,
                         dimnames = list( c("Insured", "Uninsured"),
                          c("2011", "2012", "2013", "2014", "2015", "All", "" ) )
                         )

```


```{r, include=F}
### Calculating pearson statistics

vec <- c()
vec <- c(vec, 
         as.numeric(chisq.test(ST_PD_t11[,2:3])$statistic), 1, as.numeric(chisq.test(ST_PD_t11[,2:3])$p.value),
         as.numeric(chisq.test(ST_PD_t12[,2:3])$statistic), 1, as.numeric(chisq.test(ST_PD_t12[,2:3])$p.value),
         as.numeric(chisq.test(ST_PD_t13[,2:3])$statistic), 1, as.numeric(chisq.test(ST_PD_t13[,2:3])$p.value),
         as.numeric(chisq.test(ST_PD_t14[,2:3])$statistic), 1, as.numeric(chisq.test(ST_PD_t14[,2:3])$p.value),
         as.numeric(chisq.test(ST_PD_t15[,2:3])$statistic), 1, as.numeric(chisq.test(ST_PD_t15[,2:3])$p.value),
         as.numeric(chisq.test(ST_PD_ay[,2:3])$statistic), 1, as.numeric(chisq.test(ST_PD_ay[,2:3])$p.value))

pcs.PD <- matrix(vec, nrow = 6, ncol = 3,
                 dimnames = list(c("2011", "2012", "2013", "2014", "2015", "All Years"),
                                      c("Test Statistic", "Degrees of Freedom", "P-Value")),
                 byrow = T)

### Calculating concordance statistics

Rank.Correlation <- function(x, alpha = 0.05){
  
  z <- qnorm(alpha/2, lower.tail = F)
  cols <- dim(x)[2]
  n <- sum(x[1,]) + sum(x[2,])
  np <- (n*(n-1))/2
  SE.LS.approx <- sqrt((2*(2*n+5))/(9*n*(n-1)))
  
  T.pairs <- sum(x[1,])*(sum(x[1,])-1)/2 + sum(x[2,])*(sum(x[2,])-1)/2
  Tied.r <- T.pairs
  
  C.pairs <- 0
  D.pairs <- 0
  Tied.c <- 0
  for(i in 1:cols){
    if(i < cols){
      for(j in (i + 1):cols){
        C.pairs <- C.pairs + x[1,i]*x[2,j]
        D.pairs <- D.pairs + x[2,i]*x[1,j]
      }
    }
    
    T.pairs <- T.pairs + x[1,i]*x[2,i]
    Tied.c <- Tied.c + sum(x[,i])*(sum(x[,i])-1)/2
    
  }
  
  vt <- 0
  v1.c <- 0
  v2.c <- 0
  for(i in 1:cols){
    s <- sum(x[,i])*(sum(x[,i])-1)
    vt <- vt + s*(2*sum(x[,i])+5)
    v1.c <- v1.c + s
    v2.c <- v2.c + s*(sum(x[,i])-2)
  }
  
  vu <- 0
  v1.r <- 0
  v2.r <- 0
  for(i in 1:2){
    s <- sum(x[i,])*(sum(x[i,])-1)
    vu <- vu + s*(2*sum(x[i,])+5)
    v1.r <- v1.r + s
    v2.r <- v2.r + s*(sum(x[i,])-2)
  }
  
  v1 <- v1.c*v1.r/(2*n*(n-1))
  v2 <- v2.c*v2.r/(9*n*(n-1)*(n-2))
  
  v0 <- n*(n-1)*(2*n+5)
  
  v <- (v0 - vt - vu) / (18) + v1 + v2
 
  
  Ktau.a <- (C.pairs - D.pairs) / np
  Ktau.b <- (C.pairs - D.pairs) / sqrt( (np - Tied.r)*(np - Tied.c) )
  
  Z.a <- as.numeric( 3*(C.pairs - D.pairs) / sqrt(n*(n-1)*(2*n+5)/2) )
  pva <- pnorm(abs(Z.a), lower.tail = F)
  Z.a.Alt <- as.numeric( Ktau.a/SE.LS.approx )
  pva.Alt <- pnorm(abs(Z.a.Alt), lower.tail = F)
  
  Ta.LB <- Ktau.a - z*SE.LS.approx
  Ta.UB <- Ktau.a + z*SE.LS.approx
  
  Z.b <- as.numeric( (C.pairs - D.pairs) / sqrt(v) )
  pvb <- pnorm(abs(Z.b), lower.tail = F)
  Z.b.Alt <- as.numeric( Ktau.b/SE.LS.approx )
  pvb.Alt <- pnorm(abs(Z.b.Alt), lower.tail = F)
  
  Tb.LB <- Ktau.b - z*SE.LS.approx
  Tb.UB <- Ktau.b + z*SE.LS.approx
  
  gamma <- (C.pairs - D.pairs)/(C.pairs + D.pairs)
  SE.gamma <- sqrt(n*(1-gamma^2)/(C.pairs+D.pairs))
  Z.gamma <- as.numeric( gamma/SE.gamma )
  pvg <- pnorm(abs(Z.gamma), lower.tail = F)
  
  G.LB <- gamma - z*SE.gamma
  G.UB <- gamma + z*SE.gamma
  
  out <- matrix(c(Ta.LB, Tb.LB, G.LB, Ktau.a, Ktau.b, gamma, Ta.UB, Tb.UB, G.UB, Z.a, Z.b, Z.gamma, 
                  pva, pvb, pvg, Z.a.Alt, Z.b.Alt, NA, pva.Alt, pvb.Alt, NA),
                nrow = 3, ncol = 7,
                dimnames = list(c("Tau.a", "Tau.b", "Gamma"), 
                                c("Lower.Bound","Estimate", "Upper Bound", "TS", "p.val", "Alt.TS", "p.val")))
  
  out
}

roc.pd11 <- Rank.Correlation(ST_PD_t11[,2:3])
roc.pd12 <- Rank.Correlation(ST_PD_t12[,2:3])
roc.pd13 <- Rank.Correlation(ST_PD_t13[,2:3])
roc.pd14 <- Rank.Correlation(ST_PD_t14[,2:3])
roc.pd15 <- Rank.Correlation(ST_PD_t15[,2:3])
roc.pday <- Rank.Correlation(ST_PD_ay[,2:3])

```

```{r, echo=FALSE,results='asis'}

### Table 2, concordance statistics

t11 <- cbind(c("Tau-b", "Gamma"),
              round(as.numeric(roc.pd11[2:3,1]), digits = 5),
              round(as.numeric(roc.pd11[2:3,2]), digits = 5),
              round(as.numeric(roc.pd11[2:3,3]), digits = 5),
              round(as.numeric(roc.pd11[2:3,4]), digits = 5),
              round(as.numeric(roc.pd11[2:3,5]), digits = 5))
colnames(t11) <- c("", "95% Lower Bound", "Estimate", "95% Upper Bound", "Test Statistic", "P-value")
rownames(t11) <- c("2011", "")

t12 <- cbind(c("Tau-b", "Gamma"),
              round(as.numeric(roc.pd12[2:3,1]), digits = 5),
              round(as.numeric(roc.pd12[2:3,2]), digits = 5),
              round(as.numeric(roc.pd12[2:3,3]), digits = 5),
              round(as.numeric(roc.pd12[2:3,4]), digits = 5),
              round(as.numeric(roc.pd12[2:3,5]), digits = 5))
rownames(t12) <- c("2012", "")

t13 <- cbind(c("Tau-b", "Gamma"),
              round(as.numeric(roc.pd13[2:3,1]), digits = 5),
              round(as.numeric(roc.pd13[2:3,2]), digits = 5),
              round(as.numeric(roc.pd13[2:3,3]), digits = 5),
              round(as.numeric(roc.pd13[2:3,4]), digits = 5),
              round(as.numeric(roc.pd13[2:3,5]), digits = 5))
rownames(t13) <- c("2013", "")

t14 <- cbind(c("Tau-b", "Gamma"),
              round(as.numeric(roc.pd14[2:3,1]), digits = 5),
              round(as.numeric(roc.pd14[2:3,2]), digits = 5),
              round(as.numeric(roc.pd14[2:3,3]), digits = 5),
              round(as.numeric(roc.pd14[2:3,4]), digits = 5),
              round(as.numeric(roc.pd14[2:3,5]), digits = 5))
rownames(t14) <- c("2014", "")

t15 <- cbind(c("Tau-b", "Gamma"),
              round(as.numeric(roc.pd15[2:3,1]), digits = 5),
              round(as.numeric(roc.pd15[2:3,2]), digits = 5),
              round(as.numeric(roc.pd15[2:3,3]), digits = 5),
              round(as.numeric(roc.pd15[2:3,4]), digits = 5),
              round(as.numeric(roc.pd15[2:3,5]), digits = 5))
rownames(t15) <- c("2015", "")

tay <- cbind(c("Tau-b", "Gamma"),
              round(as.numeric(roc.pday[2:3,1]), digits = 5),
              round(as.numeric(roc.pday[2:3,2]), digits = 5),
              round(as.numeric(roc.pday[2:3,3]), digits = 5),
              round(as.numeric(roc.pday[2:3,4]), digits = 5),
              round(as.numeric(roc.pday[2:3,5]), digits = 5))
rownames(tay) <- c("All", "Years")


rcts.PD <- rbind(t11,t12,t13,t14,t15,tay)
rm(t11, t12, t13, t14, t15, tay)

```

```{r, echo=F, results="asis"}

### Caculate Pearson Statistics

vec <- c()
vec <- c(vec, 
         as.numeric(chisq.test(ST_CU_t11[,2:6])$statistic), 4, as.numeric(chisq.test(ST_CU_t11[,2:6])$p.value),
         as.numeric(chisq.test(ST_CU_t12[,2:6])$statistic), 4, as.numeric(chisq.test(ST_CU_t12[,2:6])$p.value),
         as.numeric(chisq.test(ST_CU_t13[,2:6])$statistic), 4, as.numeric(chisq.test(ST_CU_t13[,2:6])$p.value),
         as.numeric(chisq.test(ST_CU_t14[,2:6])$statistic), 4, as.numeric(chisq.test(ST_CU_t14[,2:6])$p.value),
         as.numeric(chisq.test(ST_CU_t15[,2:6])$statistic), 4, as.numeric(chisq.test(ST_CU_t15[,2:6])$p.value),
         as.numeric(chisq.test(ST_CU_ay[,2:6])$statistic), 4, as.numeric(chisq.test(ST_CU_ay[,2:6])$p.value))

pcs.CU <- matrix(vec, nrow = 6, ncol = 3,
                 dimnames = list(c("2011", "2012", "2013", "2014", "2015", "All Years"),
                                      c("Test Statistic", "Degrees of Freedom", "P-Value")),
                 byrow = T)

vec <- c()
vec <- c(vec, 
         as.numeric(chisq.test(ST_CUgPD_t11[,2:6])$statistic), 4, as.numeric(chisq.test(ST_CUgPD_t11[,2:6])$p.value),
         as.numeric(chisq.test(ST_CUgPD_t12[,2:6])$statistic), 4, as.numeric(chisq.test(ST_CUgPD_t12[,2:6])$p.value),
         as.numeric(chisq.test(ST_CUgPD_t13[,2:6])$statistic), 4, as.numeric(chisq.test(ST_CUgPD_t13[,2:6])$p.value),
         as.numeric(chisq.test(ST_CUgPD_t14[,2:6])$statistic), 4, as.numeric(chisq.test(ST_CUgPD_t14[,2:6])$p.value),
         as.numeric(chisq.test(ST_CUgPD_t15[,2:6])$statistic), 4, as.numeric(chisq.test(ST_CUgPD_t15[,2:6])$p.value),
         as.numeric(chisq.test(ST_CUgPD_ay[,2:6])$statistic), 4, as.numeric(chisq.test(ST_CUgPD_ay[,2:6])$p.value))

pcs.CUgPD <- matrix(vec, nrow = 6, ncol = 3,
                 dimnames = list(c("2011", "2012", "2013", "2014", "2015", "All Years"),
                                      c("Test Statistic", "Degrees of Freedom", "P-Value")),
                 byrow = T)

```

```{r, echo=F, results="asis"}

### Calculate rank correlation statistics

roc.CU11 <- Rank.Correlation(ST_CU_t11[,2:6])
roc.CU12 <- Rank.Correlation(ST_CU_t12[,2:6])
roc.CU13 <- Rank.Correlation(ST_CU_t13[,2:6])
roc.CU14 <- Rank.Correlation(ST_CU_t14[,2:6])
roc.CU15 <- Rank.Correlation(ST_CU_t15[,2:6])
roc.CUay <- Rank.Correlation(ST_CU_ay[,2:6])

t11 <- cbind(c("Tau-b", "Gamma"),
              round(as.numeric(roc.CU11[2:3,1]), digits = 5),
              round(as.numeric(roc.CU11[2:3,2]), digits = 5),
              round(as.numeric(roc.CU11[2:3,3]), digits = 5),
              round(as.numeric(roc.CU11[2:3,4]), digits = 5),
              round(as.numeric(roc.CU11[2:3,5]), digits = 5))
colnames(t11) <- c("", "95% Lower Bound", "Estimate", "95% Upper Bound", "Test Statistic", "P-value")
rownames(t11) <- c("2011", "")

t12 <- cbind(c("Tau-b", "Gamma"),
              round(as.numeric(roc.CU12[2:3,1]), digits = 5),
              round(as.numeric(roc.CU12[2:3,2]), digits = 5),
              round(as.numeric(roc.CU12[2:3,3]), digits = 5),
              round(as.numeric(roc.CU12[2:3,4]), digits = 5),
              round(as.numeric(roc.CU12[2:3,5]), digits = 5))
rownames(t12) <- c("2012", "")

t13 <- cbind(c("Tau-b", "Gamma"),
              round(as.numeric(roc.CU13[2:3,1]), digits = 5),
              round(as.numeric(roc.CU13[2:3,2]), digits = 5),
              round(as.numeric(roc.CU13[2:3,3]), digits = 5),
              round(as.numeric(roc.CU13[2:3,4]), digits = 5),
              round(as.numeric(roc.CU13[2:3,5]), digits = 5))
rownames(t13) <- c("2013", "")

t14 <- cbind(c("Tau-b", "Gamma"),
              round(as.numeric(roc.CU14[2:3,1]), digits = 5),
              round(as.numeric(roc.CU14[2:3,2]), digits = 5),
              round(as.numeric(roc.CU14[2:3,3]), digits = 5),
              round(as.numeric(roc.CU14[2:3,4]), digits = 5),
              round(as.numeric(roc.CU14[2:3,5]), digits = 5))
rownames(t14) <- c("2014", "")

t15 <- cbind(c("Tau-b", "Gamma"),
              round(as.numeric(roc.CU15[2:3,1]), digits = 5),
              round(as.numeric(roc.CU15[2:3,2]), digits = 5),
              round(as.numeric(roc.CU15[2:3,3]), digits = 5),
              round(as.numeric(roc.CU15[2:3,4]), digits = 5),
              round(as.numeric(roc.CU15[2:3,5]), digits = 5))
rownames(t15) <- c("2015", "")

tay <- cbind(c("Tau-b", "Gamma"),
              round(as.numeric(roc.CUay[2:3,1]), digits = 5),
              round(as.numeric(roc.CUay[2:3,2]), digits = 5),
              round(as.numeric(roc.CUay[2:3,3]), digits = 5),
              round(as.numeric(roc.CUay[2:3,4]), digits = 5),
              round(as.numeric(roc.CUay[2:3,5]), digits = 5))
rownames(tay) <- c("All", "Years")

rcts.CU <- rbind(t11,t12,t13,t14,t15,tay)
rm(t11, t12, t13, t14, t15, tay)


roc.CUgPD11 <- Rank.Correlation(ST_CUgPD_t11[,2:6])
roc.CUgPD12 <- Rank.Correlation(ST_CUgPD_t12[,2:6])
roc.CUgPD13 <- Rank.Correlation(ST_CUgPD_t13[,2:6])
roc.CUgPD14 <- Rank.Correlation(ST_CUgPD_t14[,2:6])
roc.CUgPD15 <- Rank.Correlation(ST_CUgPD_t15[,2:6])
roc.CUgPDay <- Rank.Correlation(ST_CUgPD_ay[,2:6])

t11 <- cbind(c("Tau-b", "Gamma"),
              round(as.numeric(roc.CUgPD11[2:3,1]), digits = 5),
              round(as.numeric(roc.CUgPD11[2:3,2]), digits = 5),
              round(as.numeric(roc.CUgPD11[2:3,3]), digits = 5),
              round(as.numeric(roc.CUgPD11[2:3,4]), digits = 5),
              round(as.numeric(roc.CUgPD11[2:3,5]), digits = 5))
colnames(t11) <- c("", "95% Lower Bound", "Estimate", "95% Upper Bound", "Test Statistic", "P-value")
rownames(t11) <- c("2011", "")

t12 <- cbind(c("Tau-b", "Gamma"),
              round(as.numeric(roc.CUgPD12[2:3,1]), digits = 5),
              round(as.numeric(roc.CUgPD12[2:3,2]), digits = 5),
              round(as.numeric(roc.CUgPD12[2:3,3]), digits = 5),
              round(as.numeric(roc.CUgPD12[2:3,4]), digits = 5),
              round(as.numeric(roc.CUgPD12[2:3,5]), digits = 5))
rownames(t12) <- c("2012", "")

t13 <- cbind(c("Tau-b", "Gamma"),
              round(as.numeric(roc.CUgPD13[2:3,1]), digits = 5),
              round(as.numeric(roc.CUgPD13[2:3,2]), digits = 5),
              round(as.numeric(roc.CUgPD13[2:3,3]), digits = 5),
              round(as.numeric(roc.CUgPD13[2:3,4]), digits = 5),
              round(as.numeric(roc.CUgPD13[2:3,5]), digits = 5))
rownames(t13) <- c("2013", "")

t14 <- cbind(c("Tau-b", "Gamma"),
              round(as.numeric(roc.CUgPD14[2:3,1]), digits = 5),
              round(as.numeric(roc.CUgPD14[2:3,2]), digits = 5),
              round(as.numeric(roc.CUgPD14[2:3,3]), digits = 5),
              round(as.numeric(roc.CUgPD14[2:3,4]), digits = 5),
              round(as.numeric(roc.CUgPD14[2:3,5]), digits = 5))
rownames(t14) <- c("2014", "")

t15 <- cbind(c("Tau-b", "Gamma"),
              round(as.numeric(roc.CUgPD15[2:3,1]), digits = 5),
              round(as.numeric(roc.CUgPD15[2:3,2]), digits = 5),
              round(as.numeric(roc.CUgPD15[2:3,3]), digits = 5),
              round(as.numeric(roc.CUgPD15[2:3,4]), digits = 5),
              round(as.numeric(roc.CUgPD15[2:3,5]), digits = 5))
rownames(t15) <- c("2015", "")

tay <- cbind(c("Tau-b", "Gamma"),
              round(as.numeric(roc.CUgPDay[2:3,1]), digits = 5),
              round(as.numeric(roc.CUgPDay[2:3,2]), digits = 5),
              round(as.numeric(roc.CUgPDay[2:3,3]), digits = 5),
              round(as.numeric(roc.CUgPDay[2:3,4]), digits = 5),
              round(as.numeric(roc.CUgPDay[2:3,5]), digits = 5))
rownames(tay) <- c("All", "Years")

rcts.CUgPD <- rbind(t11,t12,t13,t14,t15,tay)
rm(t11, t12, t13, t14, t15, tay)

```


```{r, include=F}

PD.propdif <- function(x, alpha = 0.05){
  
  df <- x
  
  z <- qnorm(alpha/2, lower.tail = F)
  n1 <- as.numeric( sum(df[1,2:3]) )
  n2 <- as.numeric( sum(df[2,2:3]) )

  p1 <- as.numeric( df[1,2]/sum(df[1,2:3]) )
  p2 <- as.numeric( df[2,2]/sum(df[2,2:3]) )
  dif <- p1 - p2
  
  LB <- dif - z*sqrt( (p1*(1-p1))/n1 + (p2*(1-p2))/n2 )
  UB <- dif + z*sqrt( (p1*(1-p1))/n1 + (p2*(1-p2))/n2 )
  
  vec <- c(LB, dif, UB)
  vec
}

PD.pd <- c()
PD.pd <- c(PD.pd, PD.propdif(ST_PD_t11, 0.05))
PD.pd <- c(PD.pd, PD.propdif(ST_PD_t12, 0.05))
PD.pd <- c(PD.pd, PD.propdif(ST_PD_t13, 0.05))
PD.pd <- c(PD.pd, PD.propdif(ST_PD_t14, 0.05))
PD.pd <- c(PD.pd, PD.propdif(ST_PD_t15, 0.05))
PD.pd <- c(PD.pd, PD.propdif(ST_PD_ay, 0.05))

Persdoc.pdif.95CI <- matrix(PD.pd, nrow = 6, ncol = 3, byrow = TRUE, 
                            dimnames = list(c("2011", "2012", "2013", "2014", "2015", "All"), 
                                            c("Lower Bound", "Estimate", "Upper Bound")))

```

```{r, echo=F, results="asis"}

Relative.Risk <- function(x, coi = c(1), alpha = 0.05){
  
  z <- qnorm(alpha/2, lower.tail = F)
  
  n1 <- as.numeric( sum(x[1,]) )
  n2 <- as.numeric( sum(x[2,]) )
  
  p1 <- as.numeric( x[1,coi]/n1 )
  p2 <- as.numeric( x[2,coi]/n2 )
  
  rr <- p1/p2
  v <- sqrt( (1-p1)/x[1,coi] + (1-p2)/x[2,coi] )
  
  LB <- rr*exp(-z*v)
  UB <- rr*exp(z*v)
  
  vec <- c(LB, rr, UB)
  vec

}

PD.RR <- matrix(c(Relative.Risk(ST_PD_t11[,2:3]),
                  Relative.Risk(ST_PD_t12[,2:3]),
                  Relative.Risk(ST_PD_t13[,2:3]),
                  Relative.Risk(ST_PD_t14[,2:3]),
                  Relative.Risk(ST_PD_t15[,2:3]),
                  Relative.Risk(ST_PD_ay[,2:3])),
                nrow = 6, ncol = 3, byrow = T,
                dimnames = list(c("2011", "2012", "2013", "2014", "2015", "All Years"),
                                c("Lower Bound", "Estimate", "Upper Bound"))
                )

```

```{r, echo=F, results="asis"}

CU.AbandRR <- function(x, alpha = 0.05){

  z <- qnorm(alpha/2, lower.tail = F)
  
  n1 <- as.numeric( sum(x[1,]) )
  n2 <- as.numeric( sum(x[2,]) )
  
  n11 <- as.numeric(x[1,1])
  n21 <- as.numeric(x[2,1])
  
  n1g5 <- as.numeric(sum(x[1,4:5]))
  n2g5 <- as.numeric(sum(x[2,4:5]))
  
  p11 <- n11 / n1
  p21 <- n21 / n2
  
  p1g5 <- n1g5 / n1
  p2g5 <- n2g5 / n2
  
  dif.1 <- p11 - p21
  se.1 <- sqrt( (p11*(1-p11))/n1 + (p21*(1-p21))/n2 )
  LB.d1 <- dif.1 - z*se.1
  UB.d1 <- dif.1 + z*se.1
  
  dif.g5 <- p2g5 - p1g5
  se.g5 <- sqrt( (p2g5*(1-p2g5))/n2 + (p1g5*(1-p1g5))/n1 )
  LB.dg5 <- dif.g5 - z*se.g5
  UB.dg5 <- dif.g5 + z*se.g5
  
  rr.1 <- p11 / p21
  v.1 <- sqrt( (1-p11)/n11 + (1-p21)/n21 )
  LB.r1 <- rr.1*exp(-z*v.1)
  UB.r1 <- rr.1*exp(z*v.1)
  
  rr.g5 <- p2g5 / p1g5
  v.g5 <- sqrt( (1-p2g5)/n2g5 + (1-p1g5)/n1g5 )
  LB.rg5 <- rr.g5*exp(-z*v.g5)
  UB.rg5 <- rr.g5*exp(z*v.g5)
  
  vec <- c(LB.d1, dif.1, UB.d1, 
           LB.r1, rr.1, UB.r1, 
           LB.dg5, dif.g5, UB.dg5, 
           LB.rg5, rr.g5, UB.rg5)
  
  vec
}

DU <- matrix(c(CU.AbandRR(ST_CU_t11[,2:6]),
               CU.AbandRR(ST_CU_t12[,2:6]),
               CU.AbandRR(ST_CU_t13[,2:6]),
               CU.AbandRR(ST_CU_t14[,2:6]),
               CU.AbandRR(ST_CU_t15[,2:6]),
               CU.AbandRR(ST_CU_ay[,2:6])),
             nrow = 6, ncol = 12, byrow = T,
             dimnames = list(c("2011", "2012", "2013", "2014", "2015", "All Years"),
                             rep(c("Lower Bound", "Estimate", "Upper Bound"),4) )
             )

DUgPD <- matrix(c(CU.AbandRR(ST_CUgPD_t11[,2:6]),
               CU.AbandRR(ST_CUgPD_t12[,2:6]),
               CU.AbandRR(ST_CUgPD_t13[,2:6]),
               CU.AbandRR(ST_CUgPD_t14[,2:6]),
               CU.AbandRR(ST_CUgPD_t15[,2:6]),
               CU.AbandRR(ST_CUgPD_ay[,2:6])),
             nrow = 6, ncol = 12, byrow = T,
             dimnames = list(c("2011", "2012", "2013", "2014", "2015", "All Years"),
                             rep(c("Lower Bound", "Estimate", "Upper Bound"),4) )
             )

pdif.DU1ay <- rbind(round(DU[6,1:3], digits = 5),
                    round(DU[6,4:6], digits = 5), 
                    round(DUgPD[6,1:3], digits = 5), 
                    round(DUgPD[6,4:6], digits = 5))

```


# Appendix

## Additional Tables and Figures

### Pearson and Concordance Statistics Tables

```{r echo=FALSE,results='asis'}
library(pander)
panderOptions("table.caption.prefix", NULL)

### Table 1, Pearson statistics

pandoc.table(pcs.PD, caption = "**Table A1**: The Pearson chi-squared test-statistics for the general association between insurance status and having a personal doctor. The large test-statistics and p-values of virtually zero indicate that the association between insurance status and whether or not an individual has a personal doctor is highly statistically significant.", split.table = Inf)

```

***
<br>

```{r, echo=F, results="asis"}
pandoc.table(rcts.PD, caption = "**Table A2**: The rank correlation statistics for the relationship between insurance status and having a personal doctor. The positive tau-b and gamma statistics indicate that having insurance is correlated with having a personal doctor. While the large test-statistics and p-values of virtually zero indicate that this correlation is highly statistically significant, the magnitude of the tau-b statistic indicates that the strength of the correlation is moderate.", split.table = Inf)
```

***
<br>

```{r, echo=F, results="asis"}
pandoc.table(pcs.CU, caption = "**Table A3**: The Pearson chi-squared test-statistics for the general association between insurance status and time since last general health checkup (doctor utilization). The large test-statistics and p-values of virtually zero indicate that the association between insurance status and doctor utilization is highly statistically significant for every year.", split.table = Inf)
```

***
<br>

```{r, echo=F, results="asis"}

pandoc.table(pcs.CUgPD, caption = "**Table A4**: The Pearson chi-squared test-statistics for the general association between insurance status and doctor utilization when only considering individuals who reported having a personal doctor. Although the test-statistics are smaller than those observed when looking at all individuals, they are still very large and the p-values are still virtually zero, indicating that the association between insurance status and doctor utilization is still highly statistically significant.", split.table = Inf)

```

***
<br>

```{r, echo=F, results="asis"}
pandoc.table(rcts.CU, caption = "**Table A5**: The rank correlation statistics for the relationship between insurance status and time since last general health checkup (doctor utilizationt). The positive tau-b and gamma statistics indicate that having insurance is correlated with having a checkup more recently. While the large test-statistics and p-values of virtually zero indicate that this correlation is highly statistically significant, the magnitude of the tau-b statistic indicates that the strength of the correlation is not very strong.", split.table = Inf)
```

***
<br>

```{r, echo=F, results="asis"}
pandoc.table(rcts.CUgPD, caption = "**Table A6**: The rank correlation statistics for the relationship between insurance status and doctor utilization when considering only individuals who reported having a personal doctor. Again, the positive tau-b and gamma statistics (and the p-values of approximately zero) indicate that having insurance is correlated with having a checkup more recently. However, the magnitude of the tau-b statistics are less than those observed when considering all individuals. Thus the strength of the correlation appears weaker when cosidering only individuals who reported having a personal doctor.", split.table = Inf)
```

***
<br>
<br>

### Proportional Differences

```{r, echo=F, results="asis"}
pandoc.table(Persdoc.pdif.95CI, caption = "**Table A7:** 95% Confidence intervals for the absolute differences between the proportion of insured individuals with a doctor and uninsured individuals with a doctor. Note that although the absolute differences appear to decrease from 2011 to 2015, the confidence intervals for consecutive years overlap.", digits = 5, split.table = Inf)
```

***
<br>

```{r, echo=F, results="asis"}
pandoc.table(PD.RR, caption = "**Table A8:** 95% Confidence intervals for the relative differences between the proportion of insured individuals with a doctor and uninsured individuals with a doctor.", digits = 5, split.table = Inf)
```

***
<br>

```{r, echo=F, results="asis"}
pandoc.table(DU[,1:3], caption = "**Table A9:** 95% Confidence intervals for the absolute differences between the proportion of insured individuals who have had a checkup within a year and the proportion of uninsured individuals that have. Note that although the differences are very close from year to year they do generally decrease from 2011 to 2015.", digits = 5, split.table = Inf)
```

***
<br>

```{r, echo=F, results="asis"}
pandoc.table(DUgPD[,1:3], caption = "**Table A10:** 95% Confidence intervals for the absolute differences between the proportion of insured and uninsured individuals who have had a checkup within a year, only considering individuals who reported having personal doctor. Note that although the differences are very close from year to year they do generally decrease from 2011 to 2015.", digits = 5, split.table = Inf)
```

***
<br>

```{r, echo=F, results="asis"}
pandoc.table(DU[,4:6], caption = "**Table A11:** 95% Confidence intervals for the relative differences between the proportion of insured individuals who have had a checkup within a year and the proportion of uninsured individuals that have. Note that although the relative differences are close from year to year they do generally decrease from 2011 to 2015.", digits = 5, split.table = Inf)
```

***
<br>

```{r, echo=F, results="asis"}
pandoc.table(DUgPD[,4:6], caption = "**Table A12:** 95% Confidence intervals for the relative differences between the proportion of individuals who have had a checkup within a year, only considering individuals who reported having a personal doctor. Note that although the relative differences are close from year to year they do generally decrease from 2011 to 2015.", digits = 5, split.table = Inf)
```

***
<br>

```{r, echo=F, results="asis"}
pandoc.table(DU[,7:9], caption = "**Table A13:** 95% Confidence intervals for the absolute differences between the proportion of uninsured individuals who have not had a checkup within five years and the proportion of insured individuals that have not.", digits = 5, split.table = Inf)
```

***
<br>

```{r, echo=F, results="asis"}
pandoc.table(DUgPD[,7:9], caption = "**Table A14:** 95% Confidence intervals for the absolute differences between the proportion of individuals who have not had a checkup within five years, only considering individuals who reported having a personal doctor.", digits = 5, split.table = Inf)
```

***
<br>
```{r, echo=F, results="asis"}
pandoc.table(DU[,10:12], caption = "**Table A15:** 95% Confidence intervals for the relative differences between the proportion of uninsured individuals who have not had a checkup within five years and the proportion of insured individuals that have not.", digits = 5, split.table = Inf)
```

***
<br>

```{r, echo=F, results="asis"}
pandoc.table(DUgPD[,10:12], caption = "**Table A16:** 95% Confidence intervals for the relative differences between the proportion of individuals who have not had a checkup within five years, only considering individuals who reported having a personal doctor.", digits = 5, split.table = Inf)
```

***
<br>

### Data-Set Cleaning
```{r, echo=F, results="asis"}

removed.vals <- data.frame(read.csv("removedvaluestable.csv"))
colnames(removed.vals) <- c("Data Set Year", "Total Before Cleaning",   "Total After Cleaning",   "Number Removed",   "Percentage Removed" )

pandoc.table(removed.vals,
             caption = "**Table A17:** Values removed from the data sets during cleaning.",
             split.table = Inf)

```

## Technical Section: Example Equations and Code

  In order to demonstrate how the nation level analysis was done, example calculations for the personal doctor measure are shown below.
  
  <br>
  
### Pearson Chi-Squared Test

  The Pearson chi-squared test determines whether or not the distribution of the response variable (in this case whether or not an individual has a personal doctor) is the same at both levels of the explanatory variable (insurance status). The null hypothesis is that there is no association between the explanatory and response variable and thus the distribution of the response variable is the same at both levels of the explanatory variable. The alternative hypothesis is that the distribution of the response variable is not the same at both levels of the explanatory variable and thus there is an association between the explanatory and response variables.
  
  This test is performed with a frequency table. The test-statistic is the the sum of the squared difference between the observed and expected values for each cell divided by the expected values of each cell. Under the null hypothesis, this test-statistic has a chi-square distribution with degrees of freedom equal to the number of rows minus one times the number of columns minus one:

  $$X^2 = \sum_{i=1}^n {\frac{(observed - expected)^2}{expected}}   \sim \chi^2_{(r-1)(c-1)}$$

  We can easily calculate this statistic in R using the "chisq.test()" function, which takes as an argument the contingency table we are interested in. The function returns the Pearson test-statistic, the degrees of freedom, and the p-value. It can also return the observed and expected tables if requested.
  
```{r}

# ST_PD_t11[,2:3] is the frequency table for insurance status and whether or not an individual has a personal doctor.

chisq.test(ST_PD_t11[,2:3])

pcs <- chisq.test(ST_PD_t11[,2:3])

pcs$statistic

pcs$parameter

pcs$p.value

pcs$observed

pcs$expected

```

  We used this function to perform the Pearson chi-squared tests for all of our measures of interest.
  
  <br>

### Rank Correlation Statistics

  The rank correlation statistics, or concordance statistics, indicate the direction of association between two ordinal variables. The Kendall's tau-b and Goodman and Kruskal's gamma statistics range from -1 to 1, with negative values indicating negative correlation between the two variables and positive values indicating positive correlation between the two variables. A negative 1 indicates perfect negative correlation, a positive 1 indicates perfect positive correlation, and a 0 indicates no correlation. These statistics are calculated using concordant, discordant and (for the tau-b statistic) tied pairs of observations. A concordant pair of observations is a pair for which one observation ranks higher in both the variables than the other observation. A discordant pair of observations is when one observation ranks higher in one variable but lower in the other variable compared to the other observation. A tied pair of observations is when the pair of observations tie for rank in one or both variables. For instance, if we were looking at height and weight, a pair of individuals with one individual being 6 feet tall and 180 pounds and the other individual being 5.8 feet tall and 160 pounds would be a concordant pair. On the other hand, a pair of individuals with one individual being 6 feet tall and 150 pounds and the other being 5.8 feet tall and 160 pounds would be a discordant pair. Finally, a pair of individuals with one individual being 6 feet tall and 100 pounds and one being 6 feet tall and 200 pounds would be a tied pair (as long as they are tied in at least one variable they are considered a tied pair). For a data set of *n* observations, there are $ n*(n-1)/2 $ total pairs. For hypothesis testing with both Kendall's tau-b and Goodman and Kruskal's gamma, we are testing the null hypothesis that there is no correlation between the two variables against the alternative hypothesis that there is a correlation.
  
  <br>
  
  Goodman and Kruskall's gamma is the simpler of the two rank correlation statistics that we examined, but potentially less informative. Gamma only takes into account the number of discordant and concordant pairs. Thus, a data set with 100 concordant, 10 discordant, and 5 tied pairs would have the same gamma statistic as a data set with 100 concordant, 10 discordant, and 10000 tied pairs. Gamma is calculated by taking the difference between the number of concordant and discordant pairs and dividing it by the number of concordant and discordant pairs combined.
  
  $$\widehat{\gamma}  = \frac{C - D}{C + D}$$

* *C* is the number of concordant pairs
* *D* is the number of discordant pairs

  An approximate standard error for gamma for large samples is given by:
  
  $$\widehat{SE}[\gamma] = \sqrt{\frac{n(1 - \widehat{\gamma}^2)}{C + D}}$$
  
  We used this standard error to calculate approximate confidence intervals for gamma.
  
  For large samples, gamma divided by its standard error has an approximatel standard normal distribution. This is what we used as our test-statistic for gamma, in order to test if there is significant correlation between the two variables of interest (in this case, insurance status and whether or not an individual has a personal doctor).
  
  $$\frac{\widehat{\gamma}}{\widehat{SE}} \sim AN(0,1)$$

 In order to calculate the concodance statistics (both gamma and tau), their confidence intervals, their test-statistics, and the associated p-values we created an R function called "Rank.Correlation()". This function will be described after Kendall's tau-b is described next.
 
 <br>
 
 Kendall's tau-b is a little more complicated to compute and interpret than gamma, but it takes ties into account. Our data contained many ties so the tau-b statistic is perhaps a more useful measure for our situation than gamma. Kendall's tau-b looks at the number of discordant pairs and concordant pairs relative to the total number of pairs (including ties) in the data set. The formula is thus more complex.
 
 $$\widehat{\tau_b} = \frac{C - D} {\sqrt{(n_0 - n_1)(n_0 - n_2)}}$$

* $n_0$ is the total number of pairs in the data set: $n(n-1)/2$
* $n_1$ is the sum of total number of *tied* pairs at each level of the first variable: $n_1 = \sum{T_1}$
* $n_2$ is the sum of total number of *tied* pairs at each level of the second variable: $n_2 = \sum{T_2}$

  An approximate standard error for the tau-b statistic with large samples is:
  
  $$\widehat{SE}[\tau_b] = \frac{2(2n+5)}{9n(n-1)}$$

  We used this standard error to caculate approximate confidence intervals for tau-b, but we did not use it to calculate our test-statistic. Instead we used a more conservative measure.
  
  The test-statistic we used for hypothesis testing with tau-b is given below. For large samples this test-statistic has an approximately standard normal distribution.
  
   $$\widehat{z_{\tau_b}} = \frac{C - D} {\sqrt{ ( \frac{v_0 - v_t - v_u}{18} )+ v_1 + v_2  }} \sim AN(0,1)$$

* $v_0 = n(n-1)(2n+5)$
* $v_t = \sum_i t_i(t_i - 1)(2t_i + 5)$
    + $t_i$ is the total number of tied *values* (not pairs) for the $i^{th}$ level of the first variable. *Tied values* for each level of the first variable are the same as row totals (if the row variable is considered the first variable) or column totals (if the the column variable is considered the first variable).
* $v_u = \sum_j u_j(u_j - 1)(2u_j +5)$ 
    + $u_i$ is the total number of tied *values* (not pairs) for the $j^{th}$ level of the second variable. *Tied values* for each level of the second variable are the same as row totals (if the row variable is considered the second variable) or column totals (if the the column variable is considered the second variable).
* $v_1 = \frac{{\sum_i t_i(t_i - 1)}{\sum_j u_j(u_j - 1)}}{2n(n-1)}$
* $v_2 = \frac{{\sum_i t_i(t_i - 1)(t_i - 2)}{\sum_j u_j(u_j - 1)(u_j - 2)}}{9n(n-1)(n-2)}$
  
  To calculate both the gamma and tau-b statististics, their confidence intervals, their test-statistics, and the corresponding p-values we created the R function "Rank.Correlation()" shown below. This function also calculates the tau-a statistic (a Kendall's tau statistic that does not account for ties) and another test-statistic for the tau-b statistic. However, we found the above statistic to be more conservative and thus chose it for our report. Both test-statistics give p-values of approximately zero for all tests performed in this report.
  
```{r, results="asis"}

Rank.Correlation <- function(x, alpha = 0.05){
  
  z <- qnorm(alpha/2, lower.tail = F)
  cols <- dim(x)[2]
  n <- sum(x[1,]) + sum(x[2,])
  np <- (n*(n-1))/2
  SE.LS.approx <- sqrt((2*(2*n+5))/(9*n*(n-1)))
  
  T.pairs <- sum(x[1,])*(sum(x[1,])-1)/2 + sum(x[2,])*(sum(x[2,])-1)/2
  Tied.r <- T.pairs
  
  C.pairs <- 0
  D.pairs <- 0
  Tied.c <- 0
  for(i in 1:cols){
    if(i < cols){
      for(j in (i + 1):cols){
        C.pairs <- C.pairs + x[1,i]*x[2,j]
        D.pairs <- D.pairs + x[2,i]*x[1,j]
      }
    }
    
    T.pairs <- T.pairs + x[1,i]*x[2,i]
    Tied.c <- Tied.c + sum(x[,i])*(sum(x[,i])-1)/2
    
  }
  
  vt <- 0
  v1.c <- 0
  v2.c <- 0
  for(i in 1:cols){
    s <- sum(x[,i])*(sum(x[,i])-1)
    vt <- vt + s*(2*sum(x[,i])+5)
    v1.c <- v1.c + s
    v2.c <- v2.c + s*(sum(x[,i])-2)
  }
  
  vu <- 0
  v1.r <- 0
  v2.r <- 0
  for(i in 1:2){
    s <- sum(x[i,])*(sum(x[i,])-1)
    vu <- vu + s*(2*sum(x[i,])+5)
    v1.r <- v1.r + s
    v2.r <- v2.r + s*(sum(x[i,])-2)
  }
  
  v1 <- v1.c*v1.r/(2*n*(n-1))
  v2 <- v2.c*v2.r/(9*n*(n-1)*(n-2))
  
  v0 <- n*(n-1)*(2*n+5)
  
  v <- (v0 - vt - vu) / (18) + v1 + v2
 
  
  Ktau.a <- (C.pairs - D.pairs) / np
  Ktau.b <- (C.pairs - D.pairs) / sqrt( (np - Tied.r)*(np - Tied.c) )
  
  Z.a <- as.numeric( 3*(C.pairs - D.pairs) / sqrt(n*(n-1)*(2*n+5)/2) )
  pva <- pnorm(abs(Z.a), lower.tail = F)
  Z.a.Alt <- as.numeric( Ktau.a/SE.LS.approx )
  pva.Alt <- pnorm(abs(Z.a.Alt), lower.tail = F)
  
  Ta.LB <- Ktau.a - z*SE.LS.approx
  Ta.UB <- Ktau.a + z*SE.LS.approx
  
  Z.b <- as.numeric( (C.pairs - D.pairs) / sqrt(v) )
  pvb <- pnorm(abs(Z.b), lower.tail = F)
  Z.b.Alt <- as.numeric( Ktau.b/SE.LS.approx )
  pvb.Alt <- pnorm(abs(Z.b.Alt), lower.tail = F)
  
  Tb.LB <- Ktau.b - z*SE.LS.approx
  Tb.UB <- Ktau.b + z*SE.LS.approx
  
  gamma <- (C.pairs - D.pairs)/(C.pairs + D.pairs)
  SE.gamma <- sqrt(n*(1-gamma^2)/(C.pairs+D.pairs))
  Z.gamma <- as.numeric( gamma/SE.gamma )
  pvg <- pnorm(abs(Z.gamma), lower.tail = F)
  
  G.LB <- gamma - z*SE.gamma
  G.UB <- gamma + z*SE.gamma
  
  out <- matrix(c(Ta.LB, Tb.LB, G.LB, Ktau.a, Ktau.b, gamma, Ta.UB, Tb.UB, G.UB, Z.a, Z.b, Z.gamma, 
                  pva, pvb, pvg, Z.a.Alt, Z.b.Alt, NA, pva.Alt, pvb.Alt, NA),
                nrow = 3, ncol = 7,
                dimnames = list(c("Tau.a", "Tau.b", "Gamma"), 
                                c("Lower.Bound","Estimate", "Upper Bound", "TS", "p.val", "Alt.TS", "p.val")))
  
  out
}

roc.pd11 <- Rank.Correlation(ST_PD_t11[,2:3])[2:3,1:5]

pandoc.table(roc.pd11)

```

<br>

### Absolute Difference of Proportions

  The absolute difference (AD) in proportions is the proportion in one group minus the proportion in the other group. For example, for the personal doctor measure, the absolute difference in proportion is the difference between the proportion of insured individuals with a personal doctor and the proportion of uninsured individuals with a personal doctor.

```{r, echo=F, results="asis"}

x <- matrix(c(ST_PD_t11[1,2:3], ST_PD_t11[2,2:3]), 
            nrow = 2, ncol = 2, byrow = TRUE,
            dimnames = list(c("Insured", "Uninsured"), c("Personal Doctor", "No Personal Doctor")))
pandoc.table(x, caption = "2011 contingency table for personal doctor")
```

<br>

$$\widehat{AD}_{pd} = \widehat{p_1} - \widehat{p_2} = \frac{351558.2}{351558.2+ 55356.99} - \frac{36834.62}{36834.62 + 53211.23} \approx 0.864 - 0.409 = 0.455$$
<br>
  
  The standard error is calculated as if for a difference of two proportions test.

$$ \widehat{SE}[AD] = \sqrt{ \frac{\widehat{p_1}(1 - \widehat{p_1})}{n_1} + \frac{\widehat{p_2}(1 - \widehat{p_2})}{n_2}  } $$
$$ \widehat{SE}[AD_{pd}] \approx \sqrt{ \frac{0.864(1 - 0.864)}{351558.2+ 55356.99} + \frac{0.409(1 - 0.409)}{36834.62 + 53211.23}  } = \sqrt{ \frac{0.864(.136)}{406915.19} + \frac{0.409(.591)}{90045.85}  } \approx 1.72 * 10^{-3} $$

<br>
  
  In order to calculate the 95% confidence intervals we add and subtract $1.96*\widehat{SE}$ to the estimated absolute difference:
  $$ 95\% \space \text{CI}[AD_{pd}] = \widehat{AD}_{pd} \pm 1.96*\widehat{SE}[AD_{pd}] \approx 0.455 \pm 1.96(1.72*10^{-3}) \approx 0.455 \pm 0.00337 \approx (0.451, 0.458) $$

<br>

### Relative Difference of Proportions

  The relative difference of proportions is ratio of one proportion to another. For example, for the personal doctor measure, the relative difference of proportions is the ratio of the proportion of insured individuals that have a doctor to the proportion of uninsured individuals that have a doctor. So the relative difference for the personal doctor measure in 2011 is 2.11, meaning insured individuals are estimated to be about 2.11 times as likely to have a personal doctor than uninsured individuals.
  
```{r, echo=F, results="asis"}

x <- matrix(c(ST_PD_t11[1,2:3], ST_PD_t11[2,2:3]), 
            nrow = 2, ncol = 2, byrow = TRUE,
            dimnames = list(c("Insured", "Uninsured"), c("Personal Doctor", "No Personal Doctor")))
pandoc.table(x, caption = "2011 contingency table for personal doctor")
```

<br>

$$\widehat{RD}_{pd} = \frac{\widehat{p_1}}{\widehat{p_2}} \approx \frac{.864}{.409} \approx 2.11$$

<br>

The confidence interval for relative difference is calculated as follows.
$$ (1- \alpha)\% \space \text{CI}[RD] = \widehat{RD}*e^{\pm z_{\alpha / 2}*\sqrt{v}}  \space  \space \space \space  \space \space \text{where:}  \space \space v = \frac{1-\widehat{p_1}}{n_{11}} + \frac{1-\widehat{p_2}}{n_{12}}  $$

<br>

So the 95% confidence interval for the relative difference between the insured and uninsured for the personal doctor measure for 2011 is:

$$ \sqrt{v} = \sqrt{ \frac{1-\widehat{p_1}}{n_{11}} + \frac{1-\widehat{p_2}}{n_{12}} } \approx \sqrt{ \frac{.136}{351558.2 } + \frac{.591}{36834.62} } \approx 4.053*10^{-3} \\ 95\% \space \text{CI}[RD_{pd}] = 2.11*e^{\pm 1.96*0.004053} \approx (2.09, 2.13)$$
<br>
<br>