clean up code

bbartholdy · Feb 7, 2023 · 3268c77 · 3268c77
1 parent df4f3bf
commit 3268c77
Show file tree

Hide file tree

Showing 9 changed files with 9 additions and 674 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: mb11CalculusPilot
 Title: Research compendium
-Version: 0.0.3
+Version: 0.0.0.9000
 Authors@R:
     person("First", "Last", , "first.last@example.com", role = c("aut", "cre"))
 Description: Research compendium for 'Multiproxy analysis exploring patterns of diet and disease in dental calculus and skeletal remains from a 19th century Dutch population'.

diff --git a/inst/paper/_results.qmd b/inst/paper/_results.qmd
@@ -7,7 +7,6 @@ For a full list of targeted compounds, see Supplementary Material.
 
 ```{r}
 #| label: detection-tbl-setup
-
 uhplc_calculus_detect <- uhplc_calculus_long %>%
 #uhplc_calculus_detect <- uhplc_calculus_demography %>%
   #remove_missing() %>%
@@ -115,13 +114,14 @@ quantity of compound extracted from the calculus ([@fig-detection-preservation]B
 ```{r}
 #| label: fig-detection-preservation
 #| fig-cap: "(A) Violin plot with overlaid box plots depicting the distribution of extracted quantities of each compound from batch 2 separated by state of preservation of the skeleton. (B) Extracted quantity (ng) of compound plotted against weights of the calculus samples from batch 2."
+#| fig-width: 5
+#| fig-height: 7
 
 preservation_plot <- uhplc_calculus_long %>%
   filter(
     !is.na(preservation),
     quant > 0,
     batch == "batch2",
-    #!compound %in% c("cbd", "cbn", "cocaine", "thc", "thca-a", "thcva"), # remove compounds not detected
     id != "MB329" # unusually high nicotine and cotinine quantity in batch 1
     ) %>% 
   ggplot(aes(x = preservation, y = quant)) +
@@ -140,10 +140,8 @@ preservation_plot <- uhplc_calculus_long %>%
 
 weight_quant_plot <- uhplc_calculus_long %>%
   filter(
-    #!is.na(preservation),
     batch == "batch2",
     quant > 0,
-    #!compound %in% c("cbd", "cbn", "cocaine", "thc", "thca-a", "thcva"), # remove compounds not detected
     id != "MB329" # unusually high nicotine and cotinine quantity in batch 1
     ) %>% 
   ggplot(aes(x = weight, y = quant)) +
@@ -153,28 +151,6 @@ weight_quant_plot <- uhplc_calculus_long %>%
     theme(axis.title.y = element_blank()) +
     labs(y = "Quantity (ng)", x = "Weight (mg)")
 
-# weight_quant_plot <- uhplc_data_comb %>%
-#   select(sample, contains("batch2")) %>% 
-#   select(sample, batch2_weight, contains("calc")) %>% 
-#   pivot_longer(
-#     -c(sample, batch2_weight),
-#     names_to = c("compound", "batch"),
-#     names_pattern = "(.*)_(.*)",
-#     values_to = c("conc")
-#   ) %>% 
-#   mutate(compound = str_remove(compound, "_calc")) %>%
-#   #remove_missing() %>% 
-#   filter(
-#     conc > 0,
-#     id != "MB329" # unusually high nicotine and cotinine quantity in batch 1
-#     ) %>% 
-#   ggplot(aes(x = batch2_weight, y = conc)) +
-#     geom_point(col = viridisLite::viridis(1)) +
-#     facet_wrap(~ compound, scales = "free_y", labeller = labeller(compound = compound_names), dir = "v") +
-#     theme_bw() +
-#     theme(axis.title.y = element_blank()) +
-#     labs(y = "Quantity (ng)", x = "Weight (mg)")
-
 preservation_plot + weight_quant_plot + plot_annotation(tag_levels = "A")
 ```
 
@@ -197,7 +173,6 @@ tobacco_accuracy <- tobacco %>%
   group_by(sample, id, .drop = F) %>% 
   summarise(
     detection = sum(detection),
-    #compound = compound
     ) %>% 
   left_join(select(demography, id, pipe_notch, preservation, age), by = "id") %>% 
   mutate(
@@ -266,23 +241,21 @@ and may have been contaminated.
 corr_tib <- conc_cor %>% 
   as_tibble(rownames = "var") %>% 
   filter(var != "age",
-         var != "periodont_status") #%>% 
-  #mutate(across(everything(), signif, 3))
+         var != "periodont_status") 
 
 corr_tib_long <- corr_tib %>% 
   pivot_longer(
     -var,
     names_to = "name",
     values_to = "corr"
   )
-# still need to remove everything under the diagonal (colour white?)
-#corr_tib[corr_tib == 1] <- kableExtra::cell_spec(corr_tib[corr_tib == 1], color = "transparent")
+
 corr_tib_out <- corr_tib %>%
   column_to_rownames("var") %>%
   as.matrix() %>% 
   signif(digits = 2)
 cols <- 0
-for(i in 7:nrow(corr_tib)){ # terrible solution for formatting table
+for(i in 7:nrow(corr_tib)){ # remove repeated values
   cols <- cols + 1 
   corr_tib_out[i,1:cols] <- ""
 } 

diff --git a/inst/paper/paper.qmd b/inst/paper/paper.qmd
@@ -5,13 +5,13 @@
 
 library(here)
 library(readr)
+devtools::load_all()
 
 # upload data
 metadata <- read_tsv(here("inst/data/raw_data/metadata.tsv"))
 demography <- read_csv(here("inst/data/raw_data/demography.csv"))
 lloq <- read_tsv(here("inst/data/raw_data/lloq.tsv"))
 uhplc_data_comb <- read_csv(here("inst/data/derived_data/uhplc-data_combined.csv"))
-#uhplc_calculus <- read_csv(here("inst/data/derived_data/uhplc-calculus_cleaned.csv"))
 dental_inv <- read_csv(here("inst/data/raw_data/dental-inv.csv"))
 caries <- read_csv(here("inst/data/raw_data/caries.csv"))
 periodont <- read_csv(here("inst/data/raw_data/periodontitis.csv"))

diff --git a/inst/scripts/01-data_prep.R b/inst/scripts/01-data_prep.R
@@ -10,20 +10,12 @@ library(purrr)
 # Upload data -------------------------------------------------------------
 
 # See README for details
-metadata <- read_tsv("inst/data/raw_data/metadata.tsv") # ids need to be changed to match key
+metadata <- read_tsv("inst/data/raw_data/metadata.tsv")
 lloq <- read_tsv("inst/data/raw_data/lloq.tsv")
 uhplc_data_raw <- read_csv("inst/data/raw_data/uhplc-results.csv")
 uhplc_data_raw2 <- read_csv("inst/data/raw_data/uhplc-results_batch2.csv")
-dental_inv <- read_csv("inst/data/raw_data/dental-inv.csv")
 sinusitis <- read_csv("inst/data/raw_data/sinusitis.csv")
 path_cond <- read_csv("inst/data/raw_data/path-conditions.csv")
-caries <- read_csv("inst/data/raw_data/caries.csv")
-periodont <- read_csv("inst/data/raw_data/periodontitis.csv")
-periap <- read_csv("inst/data/raw_data/periapical.csv")
-calculus <- read_csv("inst/data/raw_data/calculus.csv")
-calculus_full <- read_csv("inst/data/raw_data/calculus_full.csv")
-demography <- read_csv("inst/data/raw_data/demography.csv")
-
 
 # CMS = chronic maxillary sinusitis
 # IPR = periosteal reaction on visceral surface of ribs
@@ -77,38 +69,6 @@ uhplc_data_comb <- uhplc_data_batch1 %>%
   #select(sample, contains("calc"))
 
 
-# Compounds presence/absence
-
-# uhplc_calculus_bin <- uhplc_calculus %>%
-#   mutate(
-#     across(
-#       where(is.numeric),
-#       function(x) if_else(!is.na(x), T, F) # convert concentration to boolean
-#       )
-#     ) %>%
-#   pivot_longer( # convert to long form to include only compounds present or absent in both batches
-#     cols = -sample,
-#     names_to = c("compound", "batch"),
-#     names_pattern = "(.*)_(.*)",
-#     values_to = c("value")
-#     ) %>%
-#     mutate(compound = str_remove(compound, "_calc"), # redundant suffix
-#            value = as.numeric(value)) %>% # convert to 0s and 1s
-#     group_by(sample, compound) %>%
-#     summarise(value = sum(value))
-
-# create data frame with replicated compounds and remove compounds absent in all individuals
-# uhplc_calculus_replicated <- uhplc_calculus_bin %>%
-#   group_by(id,compound) %>%
-#   summarise(presence = sum(presence)) %>%
-#   filter(presence == 0 | presence == 2) %>%
-#   #filter(sum(presence) != 0) #%>% # remove compounds absent in all individuals in sample
-#   mutate(presence = if_else(presence == 2, 1, 0)) %>% # convert replications to presence/absence
-#   pivot_wider(names_from = compound, values_from = presence) %>%
-#   mutate(across(where(is.numeric), replace_na, 0)) %>% # replace NAs with 0 (not ideal solution, may be reconsidered)
-#   left_join(select(metadata, id, sample), by = "id")
-
-
 # Sinusitis data
 
 sinusitis_clean <- sinusitis %>% # convert yes/no to true/false
@@ -126,61 +86,6 @@ path_cond_clean <- path_cond %>%
                           x == "N" ~ FALSE)
                 )
          )
-#
-# # dental data
-#
-# # Caries rate
-#
-# # convert caries data from location to number of caries and caries rate
-#
-# surface <- c("mes", "dis", "occ", "buc", "lin", "root", "crown") # here occ = incisal
-#
-# caries_rate <- caries %>%
-#   pivot_longer(t11:t48, names_to = "tooth",
-#                values_to = "caries_score") %>%
-#   na.omit() %>%
-#   separate_rows(caries_score, sep = ";") %>% # one lesion per row
-#   mutate(
-#     caries_count = if_else(
-#     caries_score == "none", 0L, 1L
-#     )
-#   ) %>%
-#   group_by(id) %>%
-#   summarise(
-#     n_teeth = n(),
-#     count = sum(caries_count, na.rm = T), # crude caries rate
-#     caries_rate = count / n_teeth
-#   ) %>%
-#   select(id, caries_rate)
-#
-# # Median periodontal status
-#
-# periodont_status <- periodont %>%
-#   mutate(periodont_status = apply(.[,-1], MARGIN = 1, FUN = median, na.rm = T)) %>%
-#   select(id, periodont_status)
-#
-# # number of periapical lesions
-#
-# periap_num <- periap %>%
-#   mutate(across(-id, ~ if_else(.x == "none", 0, 1))) %>%
-#   mutate(periap_num = apply(.[,-1], MARGIN = 1, FUN = sum, na.rm = T)) %>%
-#   select(id, periap_num)
-#
-# # calculus index
-#
-# calc_index <- calculus_full %>%
-#   dental_longer(id) %>%
-#   calculus_index(simple = T) %>%
-#   select(!c(n_surf, score_sum))
-#
-# # Prepare data for EFA ----------------------------------------------------
-#
-# data_list <- list(path_cond_clean, sinusitis_clean, caries_rate, periodont_status, periap_num, uhplc_calculus_replicated, calc_index)
-#
-# efa_data <- data_list %>%
-#   reduce(inner_join, by = "id") %>%
-#   mutate(across(where(is.logical), as.numeric)) %>%
-#   filter(complete.cases(.))
 
 
 # Export data -------------------------------------------------------------

diff --git a/inst/scripts/02_data-explore.R b/inst/scripts/02_data-explore.R