diff --git a/DESCRIPTION b/DESCRIPTION
index 7967f15..cb8ba1e 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: mb11CalculusPilot
 Title: Research compendium
-Version: 0.0.3
+Version: 0.0.0.9000
 Authors@R:
     person("First", "Last", , "first.last@example.com", role = c("aut", "cre"))
 Description: Research compendium for 'Multiproxy analysis exploring patterns of diet and disease in dental calculus and skeletal remains from a 19th century Dutch population'.
diff --git a/inst/paper/_results.qmd b/inst/paper/_results.qmd
index 49759ad..48a962c 100644
--- a/inst/paper/_results.qmd
+++ b/inst/paper/_results.qmd
@@ -7,7 +7,6 @@ For a full list of targeted compounds, see Supplementary Material.
 
 ```{r}
 #| label: detection-tbl-setup
-
 uhplc_calculus_detect <- uhplc_calculus_long %>%
 #uhplc_calculus_detect <- uhplc_calculus_demography %>%
   #remove_missing() %>%
@@ -115,13 +114,14 @@ quantity of compound extracted from the calculus ([@fig-detection-preservation]B
 ```{r}
 #| label: fig-detection-preservation
 #| fig-cap: "(A) Violin plot with overlaid box plots depicting the distribution of extracted quantities of each compound from batch 2 separated by state of preservation of the skeleton. (B) Extracted quantity (ng) of compound plotted against weights of the calculus samples from batch 2."
+#| fig-width: 5
+#| fig-height: 7
 
 preservation_plot <- uhplc_calculus_long %>%
   filter(
     !is.na(preservation),
     quant > 0,
     batch == "batch2",
-    #!compound %in% c("cbd", "cbn", "cocaine", "thc", "thca-a", "thcva"), # remove compounds not detected
     id != "MB329" # unusually high nicotine and cotinine quantity in batch 1
     ) %>% 
   ggplot(aes(x = preservation, y = quant)) +
@@ -140,10 +140,8 @@ preservation_plot <- uhplc_calculus_long %>%
 
 weight_quant_plot <- uhplc_calculus_long %>%
   filter(
-    #!is.na(preservation),
     batch == "batch2",
     quant > 0,
-    #!compound %in% c("cbd", "cbn", "cocaine", "thc", "thca-a", "thcva"), # remove compounds not detected
     id != "MB329" # unusually high nicotine and cotinine quantity in batch 1
     ) %>% 
   ggplot(aes(x = weight, y = quant)) +
@@ -153,28 +151,6 @@ weight_quant_plot <- uhplc_calculus_long %>%
     theme(axis.title.y = element_blank()) +
     labs(y = "Quantity (ng)", x = "Weight (mg)")
 
-# weight_quant_plot <- uhplc_data_comb %>%
-#   select(sample, contains("batch2")) %>% 
-#   select(sample, batch2_weight, contains("calc")) %>% 
-#   pivot_longer(
-#     -c(sample, batch2_weight),
-#     names_to = c("compound", "batch"),
-#     names_pattern = "(.*)_(.*)",
-#     values_to = c("conc")
-#   ) %>% 
-#   mutate(compound = str_remove(compound, "_calc")) %>%
-#   #remove_missing() %>% 
-#   filter(
-#     conc > 0,
-#     id != "MB329" # unusually high nicotine and cotinine quantity in batch 1
-#     ) %>% 
-#   ggplot(aes(x = batch2_weight, y = conc)) +
-#     geom_point(col = viridisLite::viridis(1)) +
-#     facet_wrap(~ compound, scales = "free_y", labeller = labeller(compound = compound_names), dir = "v") +
-#     theme_bw() +
-#     theme(axis.title.y = element_blank()) +
-#     labs(y = "Quantity (ng)", x = "Weight (mg)")
-
 preservation_plot + weight_quant_plot + plot_annotation(tag_levels = "A")
 ```
 
@@ -197,7 +173,6 @@ tobacco_accuracy <- tobacco %>%
   group_by(sample, id, .drop = F) %>% 
   summarise(
     detection = sum(detection),
-    #compound = compound
     ) %>% 
   left_join(select(demography, id, pipe_notch, preservation, age), by = "id") %>% 
   mutate(
@@ -266,8 +241,7 @@ and may have been contaminated.
 corr_tib <- conc_cor %>% 
   as_tibble(rownames = "var") %>% 
   filter(var != "age",
-         var != "periodont_status") #%>% 
-  #mutate(across(everything(), signif, 3))
+         var != "periodont_status") 
 
 corr_tib_long <- corr_tib %>% 
   pivot_longer(
@@ -275,14 +249,13 @@ corr_tib_long <- corr_tib %>%
     names_to = "name",
     values_to = "corr"
   )
-# still need to remove everything under the diagonal (colour white?)
-#corr_tib[corr_tib == 1] <- kableExtra::cell_spec(corr_tib[corr_tib == 1], color = "transparent")
+
 corr_tib_out <- corr_tib %>%
   column_to_rownames("var") %>%
   as.matrix() %>% 
   signif(digits = 2)
 cols <- 0
-for(i in 7:nrow(corr_tib)){ # terrible solution for formatting table
+for(i in 7:nrow(corr_tib)){ # remove repeated values
   cols <- cols + 1 
   corr_tib_out[i,1:cols] <- ""
 } 
diff --git a/inst/paper/paper.qmd b/inst/paper/paper.qmd
index 0824c09..0a77301 100644
--- a/inst/paper/paper.qmd
+++ b/inst/paper/paper.qmd
@@ -5,13 +5,13 @@
 
 library(here)
 library(readr)
+devtools::load_all()
 
 # upload data
 metadata <- read_tsv(here("inst/data/raw_data/metadata.tsv"))
 demography <- read_csv(here("inst/data/raw_data/demography.csv"))
 lloq <- read_tsv(here("inst/data/raw_data/lloq.tsv"))
 uhplc_data_comb <- read_csv(here("inst/data/derived_data/uhplc-data_combined.csv"))
-#uhplc_calculus <- read_csv(here("inst/data/derived_data/uhplc-calculus_cleaned.csv"))
 dental_inv <- read_csv(here("inst/data/raw_data/dental-inv.csv"))
 caries <- read_csv(here("inst/data/raw_data/caries.csv"))
 periodont <- read_csv(here("inst/data/raw_data/periodontitis.csv"))
diff --git a/inst/scripts/01-data_prep.R b/inst/scripts/01-data_prep.R
index 6256d37..c67c14c 100644
--- a/inst/scripts/01-data_prep.R
+++ b/inst/scripts/01-data_prep.R
@@ -10,20 +10,12 @@ library(purrr)
 # Upload data -------------------------------------------------------------
 
 # See README for details
-metadata <- read_tsv("inst/data/raw_data/metadata.tsv") # ids need to be changed to match key
+metadata <- read_tsv("inst/data/raw_data/metadata.tsv")
 lloq <- read_tsv("inst/data/raw_data/lloq.tsv")
 uhplc_data_raw <- read_csv("inst/data/raw_data/uhplc-results.csv")
 uhplc_data_raw2 <- read_csv("inst/data/raw_data/uhplc-results_batch2.csv")
-dental_inv <- read_csv("inst/data/raw_data/dental-inv.csv")
 sinusitis <- read_csv("inst/data/raw_data/sinusitis.csv")
 path_cond <- read_csv("inst/data/raw_data/path-conditions.csv")
-caries <- read_csv("inst/data/raw_data/caries.csv")
-periodont <- read_csv("inst/data/raw_data/periodontitis.csv")
-periap <- read_csv("inst/data/raw_data/periapical.csv")
-calculus <- read_csv("inst/data/raw_data/calculus.csv")
-calculus_full <- read_csv("inst/data/raw_data/calculus_full.csv")
-demography <- read_csv("inst/data/raw_data/demography.csv")
-
 
 # CMS = chronic maxillary sinusitis
 # IPR = periosteal reaction on visceral surface of ribs
@@ -77,38 +69,6 @@ uhplc_data_comb <- uhplc_data_batch1 %>%
   #select(sample, contains("calc"))
 
 
-# Compounds presence/absence
-
-# uhplc_calculus_bin <- uhplc_calculus %>%
-#   mutate(
-#     across(
-#       where(is.numeric),
-#       function(x) if_else(!is.na(x), T, F) # convert concentration to boolean
-#       )
-#     ) %>%
-#   pivot_longer( # convert to long form to include only compounds present or absent in both batches
-#     cols = -sample,
-#     names_to = c("compound", "batch"),
-#     names_pattern = "(.*)_(.*)",
-#     values_to = c("value")
-#     ) %>%
-#     mutate(compound = str_remove(compound, "_calc"), # redundant suffix
-#            value = as.numeric(value)) %>% # convert to 0s and 1s
-#     group_by(sample, compound) %>%
-#     summarise(value = sum(value))
-
-# create data frame with replicated compounds and remove compounds absent in all individuals
-# uhplc_calculus_replicated <- uhplc_calculus_bin %>%
-#   group_by(id,compound) %>%
-#   summarise(presence = sum(presence)) %>%
-#   filter(presence == 0 | presence == 2) %>%
-#   #filter(sum(presence) != 0) #%>% # remove compounds absent in all individuals in sample
-#   mutate(presence = if_else(presence == 2, 1, 0)) %>% # convert replications to presence/absence
-#   pivot_wider(names_from = compound, values_from = presence) %>%
-#   mutate(across(where(is.numeric), replace_na, 0)) %>% # replace NAs with 0 (not ideal solution, may be reconsidered)
-#   left_join(select(metadata, id, sample), by = "id")
-
-
 # Sinusitis data
 
 sinusitis_clean <- sinusitis %>% # convert yes/no to true/false
@@ -126,61 +86,6 @@ path_cond_clean <- path_cond %>%
                           x == "N" ~ FALSE)
                 )
          )
-#
-# # dental data
-#
-# # Caries rate
-#
-# # convert caries data from location to number of caries and caries rate
-#
-# surface <- c("mes", "dis", "occ", "buc", "lin", "root", "crown") # here occ = incisal
-#
-# caries_rate <- caries %>%
-#   pivot_longer(t11:t48, names_to = "tooth",
-#                values_to = "caries_score") %>%
-#   na.omit() %>%
-#   separate_rows(caries_score, sep = ";") %>% # one lesion per row
-#   mutate(
-#     caries_count = if_else(
-#     caries_score == "none", 0L, 1L
-#     )
-#   ) %>%
-#   group_by(id) %>%
-#   summarise(
-#     n_teeth = n(),
-#     count = sum(caries_count, na.rm = T), # crude caries rate
-#     caries_rate = count / n_teeth
-#   ) %>%
-#   select(id, caries_rate)
-#
-# # Median periodontal status
-#
-# periodont_status <- periodont %>%
-#   mutate(periodont_status = apply(.[,-1], MARGIN = 1, FUN = median, na.rm = T)) %>%
-#   select(id, periodont_status)
-#
-# # number of periapical lesions
-#
-# periap_num <- periap %>%
-#   mutate(across(-id, ~ if_else(.x == "none", 0, 1))) %>%
-#   mutate(periap_num = apply(.[,-1], MARGIN = 1, FUN = sum, na.rm = T)) %>%
-#   select(id, periap_num)
-#
-# # calculus index
-#
-# calc_index <- calculus_full %>%
-#   dental_longer(id) %>%
-#   calculus_index(simple = T) %>%
-#   select(!c(n_surf, score_sum))
-#
-# # Prepare data for EFA ----------------------------------------------------
-#
-# data_list <- list(path_cond_clean, sinusitis_clean, caries_rate, periodont_status, periap_num, uhplc_calculus_replicated, calc_index)
-#
-# efa_data <- data_list %>%
-#   reduce(inner_join, by = "id") %>%
-#   mutate(across(where(is.logical), as.numeric)) %>%
-#   filter(complete.cases(.))
 
 
 # Export data -------------------------------------------------------------
diff --git a/inst/scripts/02_data-explore.R b/inst/scripts/02_data-explore.R
deleted file mode 100644
index b3f5f27..0000000
--- a/inst/scripts/02_data-explore.R
+++ /dev/null
@@ -1,69 +0,0 @@
-# Soft-deprecated
-# should this all be done in the report?
-library(readr)
-library(dplyr)
-library(tidyr)
-library(purrr)
-
-# Upload data -------------------------------------------------------------
-
-metadata <- read_tsv("inst/data/raw_data/metadata.tsv")
-lloq <- read_tsv("inst/data/raw_data/lloq.tsv")
-uhplc_calculus <- read_tsv("inst/data/derived_data/uhplc-data_cleaned.csv")
-dental_inv <- read_csv("inst/data/raw_data/dental-inv.csv")
-caries <- read_csv("inst/data/raw_data/caries.csv")
-periodont <- read_csv("inst/data/raw_data/periodontitis.csv")
-periap <- read_csv("inst/data/raw_data/periapical.csv")
-calculus <- read_csv("inst/data/raw_data/calculus.csv")
-demography <- read_csv("inst/data/raw_data/demography.csv")
-sinusitis <- read_csv("inst/data/derived_data/sinusitis_cleaned.csv")
-
-
-# Convert to long ---------------------------------------------------------
-
-dental_inv_long <- dental_inv %>% # Inventory
-  right_join(demography) %>%
-  select(!c(occupation, own_grave, tax, pipe_notch)) %>%
-  pivot_longer(t11:t48, names_to = "tooth",
-               values_to = "status") %>%
-  mutate(status = case_when(status == "dna" ~ "m", # teeth missing due to DNA sampling
-                            TRUE ~ status))
-
-teeth_list <- list(caries, periodont, periap, calculus)
-
-test<- lapply(
-  teeth_list,
-  pivot_longer,
-  cols = t11:t48,
-  names_to = "tooth",
-  values_to = "score"
-)
-
-caries_long <- caries %>% # Caries
-  pivot_longer(t11:t48, names_to = "tooth",
-               values_to = "caries_score")
-
-periodont_long <- periodont %>% # Periodontitis
-  pivot_longer(cols = t11:t48,
-               names_to = "tooth",
-               values_to = "periodont_score")
-
-periap_long <- periap %>% # Periapical lesions
-  pivot_longer(cols = t11:t48,
-               names_to = "tooth",
-               values_to = "periap_score")
-
-calculus_long <- calculus %>% # Calculus
-  pivot_longer(cols = t11:t48,
-               names_to = "tooth",
-               values_to = "calculus_score")
-
-# prepare to merge data frames
-teeth_list <- list(dental_inv_long, caries_long, periodont_long,
-                   periap_long, calculus_long)
-
-# Long dental frame
-
-dental_long <- teeth_list %>%
-  reduce(inner_join, by = c("id", "tooth")) %>%
-  tooth_position()
diff --git a/inst/scripts/03_data-analyse.R b/inst/scripts/03_data-analyse.R
deleted file mode 100644
index d1828d4..0000000
--- a/inst/scripts/03_data-analyse.R
+++ /dev/null
@@ -1,104 +0,0 @@
-# data analysis
-library(psych)
-library(dplyr)
-library(readr)
-library(corrplot)
-
-# Upload data -------------------------------------------------------------
-
-#efa_data <- read_csv("inst/data/derived_data/efa-analysis_data.csv")
-
-# combine nicotine and cotinine to tobacco
-efa_comb <- efa_data %>%
-  mutate(tobacco = case_when(nicotine == 1 | cotinine == 1 ~ 1,
-                             TRUE ~ 0)) %>%
-  select(!c(nicotine, cotinine))
-
-# Exploratory factor analysis (psych package?)
-
-  # tetrachoric correlation for dichotomous variables
-
-
-# Missing values
-
-
-
-# selection of variables (remove very low and high frequencies)
-
-# dichotomise caries and calculus ratios for polychoric correlation
-  # caries ratios falling below the 25th percentile are considered absent
-  # discretisation of caries and calculus
-caries_discrete <- quantile(caries_rate$caries_rate)
-calculus_discrete <- quantile(calc_index$calc_index)
-
-caries_bin <- caries_rate %>%
-  mutate(caries_rate = case_when(caries_rate > quantile(caries_rate)[2] ~ 1,
-                                 TRUE ~ 0
-  ))
-
-
-efa_filter <- efa_comb %>%
-  select(!c(OD)) %>% # vertebral osteophytosis highly correlated with OA
-  summarise(across(where(is.numeric), function(x) sum(x) / length(x))) %>%
-  select(which(colSums(.) > 0.1))
-
-efa_select <- efa_comb %>%
-  select(names(efa_filter)) %>%
-  select(!c(periap_num)) %>%
-  filter(complete.cases(.)) #%>%
-  as.matrix()
-
-efa_discrete <- efa_select %>%
-  mutate(
-    calc_index = case_when(
-      calc_index <= calculus_discrete[1] ~ 0,
-      calc_index >= calculus_discrete[1] & calc_index < calculus_discrete[2] ~ 1,
-      calc_index >= calculus_discrete[2] & calc_index < calculus_discrete[3] ~ 2,
-      calc_index >= calculus_discrete[3] & calc_index < calculus_discrete[4] ~ 3,
-      calc_index >= calculus_discrete[4] & calc_index <= calculus_discrete[5] ~ 4,
-      ),
-    caries_rate = case_when(
-      caries_rate <= caries_discrete[1] ~ 0,
-      caries_rate >= caries_discrete[1] & caries_rate < caries_discrete[2] ~ 1,
-      caries_rate >= caries_discrete[2] & caries_rate < caries_discrete[3] ~ 2,
-      caries_rate >= caries_discrete[3] & caries_rate < caries_discrete[4] ~ 3,
-      caries_rate >= caries_discrete[4] & caries_rate <= caries_discrete[5] ~ 4,
-    )
-  ) %>%
-  as.matrix()
-
-
-
-KMO(cor(efa_discrete)) # lower than suggested 0.6, maybe factor analysis not good
-cortest.bartlett(efa_discrete) # small value, factor analysis okay
-
-
-
-# Polychoric correlation
-
-cor(efa_select)[,c("caries_rate","calc_index")] # for comparing caries and calculus with compounds before dichotomisation
-
-polycorr <- polychoric(efa_discrete)
-
-corrplot(polycorr$rho, method = "number", type = "lower", diag = F)
-
-# Factor analysis ---------------------------------------------------------
-
-efa_init <- fa(efa_discrete, nfactors = ncol(efa_discrete), rotate = "none")
-eigenval <- efa_init$e.values
-data.frame(
-  n_fact = as.factor(1:length(eigenval)),
-  eigenval
-           ) %>%
-  ggplot(aes(x = n_fact, y = eigenval, group = 1)) +
-    geom_line() +
-    geom_point()
-
-efa_analysis <- fa(efa_select)
-fa.diagram(efa_analysis)
-
-
-
-# Multiple correspondence analysis ----------------------------------------
-
-
diff --git a/inst/scripts/new-explore.R b/inst/scripts/new-explore.R
deleted file mode 100644
index fc595aa..0000000
--- a/inst/scripts/new-explore.R
+++ /dev/null
@@ -1,118 +0,0 @@
-library(tidyverse)
-library(here)
-source(here("scripts/dental-plot.R"))
-
-dental_long <- readr::read_csv(here("data/derived_data/dental-full_long.csv"))
-
-
-
-# Data wrangling ----------------------------------------------------------
-
-# Caries rate
-
-# convert caries data from location to number of caries and caries rate
-# number of lesions per tooth observed
-# separate anterior and posterior teeth
-surface <- c("mes", "dis", "occ", "buc", "lin", "root", "crown") # here occ = incisal
-
-caries_rate <- dental_long %>%
-  filter(status == "p") %>%
-  separate_rows(caries_score, sep = ";") %>% # one lesion per row
-  mutate(caries_count = if_else(
-    caries_score == "none", 0L, 1L
-    )
-  ) %>%
-  group_by(id, tooth_type, position) %>%
-  summarise(
-    n_teeth = n(),
-         count = sum(caries_count, na.rm = T),
-         rate = count / n_teeth
-            )
-
-
-  # mutate(caries_count = if_else(caries_score == "none", 0L, 
-  #                               stringi::stri_count_words(caries_score)
-  #                               )
-  #       )
-  
-#caries_rate <- 
-# caries_long %>%
-#   group_by(id, position) %>%
-#   filter(status == "p") %>%
-#   summarise(n_teeth = n(),
-#             count = sum(caries_count, na.rm = T),
-#             rate = count / n_teeth
-#             ) %>%
-#   ungroup()
-#filter(status == "p") %>%
-#  summarise(caries_count = sum(caries_count, na.rm = T)) %>% #view
-#  group_by(id) %>%
-#  summarise(n_teeth = n(),
-#            caries_rate = sum(caries_count, na.rm = T) / n_teeth)
-
-# caries_rate_type <- dental_long %>%
-#   group_by(tooth_type) %>%
-#   summarise(n_teeth = sum(!is.na(caries_count)),
-#             n_caries = sum(caries_count, na.rm = T),
-#             caries_rate = n_caries / n_teeth) # shouldn't be any NA??
-
-# CARIES RATE CALCULATION
-
-# Caries longform data with row for each surface of each tooth?
-
-# convert periodontitis and calculus to mean/median score?  
-
-
-
-#full_data_long
-
-# compare replicated samples across batch 1 and 2
-
-caffeine_rep <- uhplc_data_comb %>%
-  select(sample, contains("caffeine")) %>%
-  select(sample, contains("calc")) %>%
-  summarise(caffeine = sum(caffeine_calc_batch1, caffeine_calc_batch2)) %>%
-  mutate(caffeine = if_else(!is.na(caffeine), "yes", "no"))
-#  mutate(caffeine = rowSums()) %>%
-#  select(sample, caffeine)
-
-replicates <- data.frame("sample" = uhplc_data_batch2$sample)
-
-tobacco <- compounds %>%
-  #na.omit() %>%
-  filter(compound %in% c("nicotine", "cotinine")) %>%
-  inner_join(demography, by = "id")
-# Summary data frames -----------------------------------------------------
-
-# Central tendency of dental lesions per individual
-
-
-
-# Plots -------------------------------------------------------------------
-
-# types of lesions
-dental_long %>%
-  filter(!is.na(caries_score),
-         caries_score != "none",
-         caries_score != "root?") %>%
-  separate_rows(caries_score, sep = ";") %>%
-  dental_plot(fill = caries_score)
-
-# overall distribution of dental lesions in sample
-
-# dental_long %>%
-#   dental_plot(caries_count)
-# 
-# dental_long %>%
-#   dental_plot(periodont_score)
-# 
-# dental_long %>%
-#   dental_plot(periap_score)
-
-## this plot is essentially useless since I targeted individuals with calculus
-# on lower incisors
-# dental_long %>%
-#   dental_plot(calculus_score) 
-
-# distribution of 
-
diff --git a/inst/scripts/new-prep.R b/inst/scripts/new-prep.R
deleted file mode 100644
index 8df4e75..0000000
--- a/inst/scripts/new-prep.R
+++ /dev/null
@@ -1,185 +0,0 @@
-library(tidyverse)
-library(here)
-
-# Upload data -------------------------------------------------------------
-
-# See README for details
-lloq <- readr::read_tsv(here("data/lloq.tsv"))
-uhplc_data_raw <- readr::read_csv(here("data/raw/anon/uhplc-results.csv"))
-uhplc_data_raw2 <- readr::read_csv(here("data/raw/anon/uhplc-results_batch2.csv"))
-dental_inv <- readr::read_csv(here("data/raw/anon/dental-inv.csv"))
-caries <- readr::read_csv(here("data/raw/anon/caries.csv"))
-periodont <- readr::read_csv(here("data/raw/anon/periodontitis.csv"))
-periap <- readr::read_csv(here("data/raw/anon/periapical.csv"))
-calculus <- readr::read_csv(here("data/raw/anon/calculus.csv"))
-demography <- readr::read_csv(here("data/raw/anon/demography.csv"))
-sinusitis <- readr::read_csv(here("data/raw/anon/sinusitis.csv"), na = "UNOBS")
-# CMS = chronic maxillary sinusitis
-# IPR = periosteal reaction on visceral surface of ribs 
-  # present only if active (pulmonary infection)
-
-# Helper objects ----------------------------------------------------------
-
-# vectors to categorise teeth by position and type (FDI notation)
-maxilla <- c(paste0("t", 11:18), paste0("t", 21:28))
-mandible <- c(paste0("t", 31:38), paste0("t", 41:48)) 
-left <- c(paste0("t", 21:28), paste0("t", 31:38))
-posterior <- c(paste0("t", 14:18), paste0("t", 24:28), 
-               paste0("t", 34:38), paste0("t", 44:48))
-molar <- c(paste0("t", 16:18), paste0("t", 26:28), 
-           paste0("t", 36:38), paste0("t", 46:48))
-incisor <- c(paste0("t", 11:12), paste0("t", 21:22), 
-             paste0("t", 31:32), paste0("t", 41:42))
-premolar <- c(paste0("t", 14:15), paste0("t", 24:25), 
-              paste0("t", 34:35), paste0("t", 44:45))
-canine <- c(paste0("t", 13), paste0("t", 23), 
-            paste0("t", 33), paste0("t", 43))
-
-# Data cleaning -----------------------------------------------------------
-
-# UHPLC-MS/MS results
-
-uhplc_data <- uhplc_data_raw %>%
-  filter(sample != "16b") %>% # sample 16b removed (test sample that differs from 16 in sampling location)
-  group_by(sample) %>%
-  mutate(across(weight:cocaine_calculus, sum)) %>% # combine 12.1 and 12.2 (split samples from same sampling location)
-  filter(sample != "12.2") %>%
-  rename(theophyl_calc = thyephyl_calc) #%>%
-
-uhplc_data$sample[uhplc_data$sample == "12.1"] <- "12" # change value to 12
-uhplc_data$sample <- as.factor(uhplc_data$sample)
-
-write_csv(uhplc_data, here("data/derived_data/uhplc-data_cleaned.csv"))
-# write cleaned data to data/derived_data
-
-id_sample_key <- uhplc_data %>%
-  select(id, sample)
-
-uhplc_data_batch2 <- uhplc_data_raw2 %>%
-  rename(theophyl_calc = theophylline_calc) %>%
-  mutate(sample = as_factor(sample))
-
-write_csv(uhplc_data_batch2, here("data/derived_data/uhplc-data-batch2_cleaned.csv"))
-# write cleaned data to data/derived_data
-
-# Combine batch 1 and 2
-
-uhplc_data_comb <- uhplc_data %>%
-  full_join(uhplc_data_batch2, by = "sample", suffix = c("_batch1", "_batch2"))
-names(uhplc_data_comb)
-
-uhplc_long_batch1 <- uhplc_data %>%
-  pivot_longer(cols = caffeine_wash1:cocaine_calculus, 
-               names_to = c("compound", "wash"), names_sep = "_", 
-               values_to = "quantity") %>% # combine wash1-3 into single column
-  select(!c(weight, weight_wash1, weight_wash2, weight_wash3)) %>%
-  mutate(batch = 1)
-
-uhplc_long_batch2 <- uhplc_data_batch2 %>%
-  semi_join(id_sample_key) %>%
-  pivot_longer(cols = caffeine_wash1:cocaine_calc, 
-               names_to = c("compound", "wash"), names_sep = "_", 
-               values_to = "quantity") %>% # combine wash1-3 into single column
-  select(!c(weight, weight_wash1, weight_wash2, weight_wash3)) %>%
-  mutate(batch = 2)
-
-uhplc_long <- uhplc_long_batch1 %>%
-  bind_rows(uhplc_long_batch2)
-
-## NEED TO CALCULATE CONCENTRATION BEFORE COMBINING (although concentration is not really relevant) ##
-
-compounds_batch1 <- uhplc_data %>%
-  select(!contains(c("wash", "weight"))) %>%
-  pivot_longer(cols = where(is.numeric), 
-               names_to = "compound", values_to = "quant") %>%
-  mutate(batch = 1) %>%
-  mutate(compound = str_replace(compound, "_calc", ""))
-
-compounds_batch2 <- uhplc_data_batch2 %>%
-  select(!contains(c("wash", "weight"))) %>%
-  pivot_longer(cols = caffeine_calc:cocaine_calc, 
-               names_to = "compound", values_to = "quant") %>%
-  mutate(batch = 2) %>%
-  mutate(compound = str_replace(compound, "_calc", ""))
-
-# combined data frame of compound detection across both batches
-compounds <- compounds_batch2 %>%
-  left_join(id_sample_key) %>%
-  bind_rows(compounds_batch1) %>%
-  mutate(detection = if_else(is.na(quant), FALSE, TRUE)) # presence/absence variable
-
-
-# Calculate concentration (ng / mg) of each compound in the calculus
-
-uhplc_data_conc <- uhplc_data %>%
-  select(!contains(c("_wash1", "_wash2"))) %>%
-  mutate(across(contains("_calc"), ~ .x / weight_wash3)) %>%
-  select(!contains("_wash3"))
-
-sinusitis <- sinusitis %>%
-  mutate(across(CMS:IPR, 
-                function(x) case_when(x == "YES" ~ TRUE,
-                                      x == "NO" ~ FALSE)))
-
-# Dental data -------------------------------------------------------------
-
-# convert to long form
-
-dental_inv_long <- dental_inv %>% # Inventory
-  right_join(demography) %>%
-  select(!c(occupation, own_grave, tax, pipe_notch)) %>%
-  pivot_longer(t11:t48, names_to = "tooth", 
-               values_to = "status") %>%
-  mutate(status = case_when(status == "dna" ~ "m", # teeth missing due to DNA sampling
-                            TRUE ~ status))
-
-caries_long <- caries %>% # Caries
-  pivot_longer(t11:t48, names_to = "tooth", 
-               values_to = "caries_score")
-
-periodont_long <- periodont %>% # Periodontitis
-  pivot_longer(cols = t11:t48,
-               names_to = "tooth", 
-               values_to = "periodont_score")
-
-periap_long <- periap %>% # Periapical lesions
-  pivot_longer(cols = t11:t48,
-               names_to = "tooth", 
-               values_to = "periap_score")
-
-calculus_long <- calculus %>% # Calculus
-  pivot_longer(cols = t11:t48,
-               names_to = "tooth", 
-               values_to = "calculus_score")
-
-# Combined data -----------------------------------------------------------
-
-# prepare to merge data frames
-teeth_list <- list(dental_inv_long, caries_long, periodont_long, 
-                   periap_long, calculus_long)
-
-# Long dental frame
-
-dental_long <- teeth_list %>%
-  reduce(inner_join, by = c("id", "tooth")) %>%
-  mutate(region = if_else(tooth %in% maxilla, "maxilla", "mandible"),
-         side = if_else(tooth %in% left, "left", "right"), 
-         position = if_else(tooth %in% posterior, "posterior", "anterior"),
-         tooth_type = case_when(tooth %in% incisor ~ "incisor",
-                                tooth %in% canine ~ "canine",
-                                tooth %in% premolar ~ "premolar",
-                                tooth %in% molar ~ "molar"),
-         .before = status) %>%
-  mutate(comb_sex = case_when(sex == "pm" ~ "m",
-                         sex == "pf" ~ "f",
-                         TRUE ~ sex))
-
-write_csv(dental_long, here("data/derived_data/dental-full_long.csv"))
-
-# data frame with one row per individual
-  # one column per dental disease
-  # one column for presence/absence of compound
-
-# all_data_wide <- caries_rate %>%
-#   left_join(aml_rate)
-  
diff --git a/inst/scripts/setup-qmd.R b/inst/scripts/setup-qmd.R
index 1691ed9..0ae6b45 100644
--- a/inst/scripts/setup-qmd.R
+++ b/inst/scripts/setup-qmd.R
@@ -1,5 +1,3 @@
-library(here)
-library(readr)
 library(dplyr)
 library(tidyr)
 library(tibble)
@@ -10,7 +8,6 @@ library(glue)
 library(purrr)
 library(psych)
 library(patchwork)
-devtools::load_all()
 
 try(generate_bib())
 
@@ -71,45 +68,6 @@ uhplc_calculus_long <- uhplc_data_long %>%
   left_join(demography, by = "id") %>%
   mutate(preservation = factor(preservation, levels = c("fair", "good", "excellent")))
 
-# uhplc_calculus_demography <- uhplc_calculus_long %>%
-#   left_join(demography) %>%
-#   mutate(preservation = factor(preservation, levels = c("fair", "good", "excellent")))
-
-# presence/absence data frame
-uhplc_calculus_bin <- uhplc_calculus_long %>%
-  mutate(presence = if_else(quant > 0, 1, quant))
-
-# successfully replicated samples only
-uhplc_calculus_replicated <- uhplc_calculus_bin %>%
-  mutate(compound = str_remove(compound, "_calc")) %>%
-  group_by(id, sample, compound) %>% # combine batches
-  summarise(presence = sum(presence)) #%>%
-  # filter(presence == 0 | presence == 2) %>%
-  # group_by(compound) %>%
-  # mutate(presence = if_else(presence == 2, 1, 0)) # convert replications to presence/absence
-
-uhplc_calculus_replicated <- uhplc_calculus_bin %>%
-  filter(id %in% filter(metadata, replicated == TRUE)$id) %>%
-  #right_join(select(filter(metadata, replicated == TRUE)), by = c("id", "sample"))
-  #mutate(compound = str_remove(compound, "_calc")) %>%
-  group_by(id, sample, compound) %>% # combine batches
-  summarise(presence = sum(presence)) %>%
-  filter(presence == 0 | presence == 2) %>% # remove compounds only detected in one batch
-  group_by(compound) %>%
-  mutate(presence = if_else(presence == 0, 0, 1)) %>%  # convert replications to presence/absence
-  ungroup()
-
-uhplc_replicated_wide <- uhplc_calculus_replicated %>%
-  mutate(compound = case_when(compound == "nicotine" ~ "tobacco",
-                              compound == "cotinine" ~ "tobacco",
-                              TRUE ~ compound)) %>%
-  group_by(id, sample, compound) %>%
-  summarise(presence = sum(presence)) %>% # combine nicotine and cotinine
-  #remove_missing() %>%
-  mutate(presence = case_when(presence > 0 ~ TRUE,
-                              TRUE ~ FALSE)) %>%
-  pivot_wider(names_from = "compound", values_from = "presence")
-
 uhplc_conc_wide <- uhplc_calculus_long %>%
   filter(batch == "batch2") %>%
   #left_join(select(metadata, id, batch2_weight)) %>%
@@ -353,29 +311,4 @@ polycorr_tib <- polycorr$rho %>%
   # ) %>%
   # filter(corr != 1)
 
-# objects with correlation statements about variables
-
-# if(nrow(filter(polycorr_tib, strength == "strong")) == 0){
-#   strong_correlations <- "No strong correlations were found"
-# } else {
-#   strong_correlations <- polycorr_tib %>%
-#     filter(strength == "strong") %>%
-#     #distinct(rho, .keep_all = T) %>%
-#     slice(seq(from = 2, to = nrow(.), by = 2)) %>%  # awkward solution to distinct not working
-#     mutate(statement = glue("{var} and {name} ({signif(corr, 3)})")) %>%
-#     .$statement %>%
-#     paste(collapse = ", ")
-# }
-#
-# if(nrow(filter(polycorr_tib, strength == "moderate")) == 0){
-#   moderate_correlations <- "No moderate correlations were found"
-# } else {
-# moderate_correlations <- polycorr_tib %>%
-#   filter(strength == "moderate") %>%
-#   arrange(desc(abs(corr))) %>%
-#   slice(seq(from = 2, to = nrow(.), by = 2)) %>%  # awkward solution to distinct not working
-#   #distinct(rho, .keep_all = TRUE) %>% # not working
-#   mutate(statement = glue("{var} and {name} ({signif(corr, 3)})")) %>%
-#   .$statement %>%
-#   paste(collapse = ", ")
-# }
+