diff --git a/DESCRIPTION b/DESCRIPTION index 7967f15..cb8ba1e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: mb11CalculusPilot Title: Research compendium -Version: 0.0.3 +Version: 0.0.0.9000 Authors@R: person("First", "Last", , "first.last@example.com", role = c("aut", "cre")) Description: Research compendium for 'Multiproxy analysis exploring patterns of diet and disease in dental calculus and skeletal remains from a 19th century Dutch population'. diff --git a/inst/paper/_results.qmd b/inst/paper/_results.qmd index 49759ad..48a962c 100644 --- a/inst/paper/_results.qmd +++ b/inst/paper/_results.qmd @@ -7,7 +7,6 @@ For a full list of targeted compounds, see Supplementary Material. ```{r} #| label: detection-tbl-setup - uhplc_calculus_detect <- uhplc_calculus_long %>% #uhplc_calculus_detect <- uhplc_calculus_demography %>% #remove_missing() %>% @@ -115,13 +114,14 @@ quantity of compound extracted from the calculus ([@fig-detection-preservation]B ```{r} #| label: fig-detection-preservation #| fig-cap: "(A) Violin plot with overlaid box plots depicting the distribution of extracted quantities of each compound from batch 2 separated by state of preservation of the skeleton. (B) Extracted quantity (ng) of compound plotted against weights of the calculus samples from batch 2." +#| fig-width: 5 +#| fig-height: 7 preservation_plot <- uhplc_calculus_long %>% filter( !is.na(preservation), quant > 0, batch == "batch2", - #!compound %in% c("cbd", "cbn", "cocaine", "thc", "thca-a", "thcva"), # remove compounds not detected id != "MB329" # unusually high nicotine and cotinine quantity in batch 1 ) %>% ggplot(aes(x = preservation, y = quant)) + @@ -140,10 +140,8 @@ preservation_plot <- uhplc_calculus_long %>% weight_quant_plot <- uhplc_calculus_long %>% filter( - #!is.na(preservation), batch == "batch2", quant > 0, - #!compound %in% c("cbd", "cbn", "cocaine", "thc", "thca-a", "thcva"), # remove compounds not detected id != "MB329" # unusually high nicotine and cotinine quantity in batch 1 ) %>% ggplot(aes(x = weight, y = quant)) + @@ -153,28 +151,6 @@ weight_quant_plot <- uhplc_calculus_long %>% theme(axis.title.y = element_blank()) + labs(y = "Quantity (ng)", x = "Weight (mg)") -# weight_quant_plot <- uhplc_data_comb %>% -# select(sample, contains("batch2")) %>% -# select(sample, batch2_weight, contains("calc")) %>% -# pivot_longer( -# -c(sample, batch2_weight), -# names_to = c("compound", "batch"), -# names_pattern = "(.*)_(.*)", -# values_to = c("conc") -# ) %>% -# mutate(compound = str_remove(compound, "_calc")) %>% -# #remove_missing() %>% -# filter( -# conc > 0, -# id != "MB329" # unusually high nicotine and cotinine quantity in batch 1 -# ) %>% -# ggplot(aes(x = batch2_weight, y = conc)) + -# geom_point(col = viridisLite::viridis(1)) + -# facet_wrap(~ compound, scales = "free_y", labeller = labeller(compound = compound_names), dir = "v") + -# theme_bw() + -# theme(axis.title.y = element_blank()) + -# labs(y = "Quantity (ng)", x = "Weight (mg)") - preservation_plot + weight_quant_plot + plot_annotation(tag_levels = "A") ``` @@ -197,7 +173,6 @@ tobacco_accuracy <- tobacco %>% group_by(sample, id, .drop = F) %>% summarise( detection = sum(detection), - #compound = compound ) %>% left_join(select(demography, id, pipe_notch, preservation, age), by = "id") %>% mutate( @@ -266,8 +241,7 @@ and may have been contaminated. corr_tib <- conc_cor %>% as_tibble(rownames = "var") %>% filter(var != "age", - var != "periodont_status") #%>% - #mutate(across(everything(), signif, 3)) + var != "periodont_status") corr_tib_long <- corr_tib %>% pivot_longer( @@ -275,14 +249,13 @@ corr_tib_long <- corr_tib %>% names_to = "name", values_to = "corr" ) -# still need to remove everything under the diagonal (colour white?) -#corr_tib[corr_tib == 1] <- kableExtra::cell_spec(corr_tib[corr_tib == 1], color = "transparent") + corr_tib_out <- corr_tib %>% column_to_rownames("var") %>% as.matrix() %>% signif(digits = 2) cols <- 0 -for(i in 7:nrow(corr_tib)){ # terrible solution for formatting table +for(i in 7:nrow(corr_tib)){ # remove repeated values cols <- cols + 1 corr_tib_out[i,1:cols] <- "" } diff --git a/inst/paper/paper.qmd b/inst/paper/paper.qmd index 0824c09..0a77301 100644 --- a/inst/paper/paper.qmd +++ b/inst/paper/paper.qmd @@ -5,13 +5,13 @@ library(here) library(readr) +devtools::load_all() # upload data metadata <- read_tsv(here("inst/data/raw_data/metadata.tsv")) demography <- read_csv(here("inst/data/raw_data/demography.csv")) lloq <- read_tsv(here("inst/data/raw_data/lloq.tsv")) uhplc_data_comb <- read_csv(here("inst/data/derived_data/uhplc-data_combined.csv")) -#uhplc_calculus <- read_csv(here("inst/data/derived_data/uhplc-calculus_cleaned.csv")) dental_inv <- read_csv(here("inst/data/raw_data/dental-inv.csv")) caries <- read_csv(here("inst/data/raw_data/caries.csv")) periodont <- read_csv(here("inst/data/raw_data/periodontitis.csv")) diff --git a/inst/scripts/01-data_prep.R b/inst/scripts/01-data_prep.R index 6256d37..c67c14c 100644 --- a/inst/scripts/01-data_prep.R +++ b/inst/scripts/01-data_prep.R @@ -10,20 +10,12 @@ library(purrr) # Upload data ------------------------------------------------------------- # See README for details -metadata <- read_tsv("inst/data/raw_data/metadata.tsv") # ids need to be changed to match key +metadata <- read_tsv("inst/data/raw_data/metadata.tsv") lloq <- read_tsv("inst/data/raw_data/lloq.tsv") uhplc_data_raw <- read_csv("inst/data/raw_data/uhplc-results.csv") uhplc_data_raw2 <- read_csv("inst/data/raw_data/uhplc-results_batch2.csv") -dental_inv <- read_csv("inst/data/raw_data/dental-inv.csv") sinusitis <- read_csv("inst/data/raw_data/sinusitis.csv") path_cond <- read_csv("inst/data/raw_data/path-conditions.csv") -caries <- read_csv("inst/data/raw_data/caries.csv") -periodont <- read_csv("inst/data/raw_data/periodontitis.csv") -periap <- read_csv("inst/data/raw_data/periapical.csv") -calculus <- read_csv("inst/data/raw_data/calculus.csv") -calculus_full <- read_csv("inst/data/raw_data/calculus_full.csv") -demography <- read_csv("inst/data/raw_data/demography.csv") - # CMS = chronic maxillary sinusitis # IPR = periosteal reaction on visceral surface of ribs @@ -77,38 +69,6 @@ uhplc_data_comb <- uhplc_data_batch1 %>% #select(sample, contains("calc")) -# Compounds presence/absence - -# uhplc_calculus_bin <- uhplc_calculus %>% -# mutate( -# across( -# where(is.numeric), -# function(x) if_else(!is.na(x), T, F) # convert concentration to boolean -# ) -# ) %>% -# pivot_longer( # convert to long form to include only compounds present or absent in both batches -# cols = -sample, -# names_to = c("compound", "batch"), -# names_pattern = "(.*)_(.*)", -# values_to = c("value") -# ) %>% -# mutate(compound = str_remove(compound, "_calc"), # redundant suffix -# value = as.numeric(value)) %>% # convert to 0s and 1s -# group_by(sample, compound) %>% -# summarise(value = sum(value)) - -# create data frame with replicated compounds and remove compounds absent in all individuals -# uhplc_calculus_replicated <- uhplc_calculus_bin %>% -# group_by(id,compound) %>% -# summarise(presence = sum(presence)) %>% -# filter(presence == 0 | presence == 2) %>% -# #filter(sum(presence) != 0) #%>% # remove compounds absent in all individuals in sample -# mutate(presence = if_else(presence == 2, 1, 0)) %>% # convert replications to presence/absence -# pivot_wider(names_from = compound, values_from = presence) %>% -# mutate(across(where(is.numeric), replace_na, 0)) %>% # replace NAs with 0 (not ideal solution, may be reconsidered) -# left_join(select(metadata, id, sample), by = "id") - - # Sinusitis data sinusitis_clean <- sinusitis %>% # convert yes/no to true/false @@ -126,61 +86,6 @@ path_cond_clean <- path_cond %>% x == "N" ~ FALSE) ) ) -# -# # dental data -# -# # Caries rate -# -# # convert caries data from location to number of caries and caries rate -# -# surface <- c("mes", "dis", "occ", "buc", "lin", "root", "crown") # here occ = incisal -# -# caries_rate <- caries %>% -# pivot_longer(t11:t48, names_to = "tooth", -# values_to = "caries_score") %>% -# na.omit() %>% -# separate_rows(caries_score, sep = ";") %>% # one lesion per row -# mutate( -# caries_count = if_else( -# caries_score == "none", 0L, 1L -# ) -# ) %>% -# group_by(id) %>% -# summarise( -# n_teeth = n(), -# count = sum(caries_count, na.rm = T), # crude caries rate -# caries_rate = count / n_teeth -# ) %>% -# select(id, caries_rate) -# -# # Median periodontal status -# -# periodont_status <- periodont %>% -# mutate(periodont_status = apply(.[,-1], MARGIN = 1, FUN = median, na.rm = T)) %>% -# select(id, periodont_status) -# -# # number of periapical lesions -# -# periap_num <- periap %>% -# mutate(across(-id, ~ if_else(.x == "none", 0, 1))) %>% -# mutate(periap_num = apply(.[,-1], MARGIN = 1, FUN = sum, na.rm = T)) %>% -# select(id, periap_num) -# -# # calculus index -# -# calc_index <- calculus_full %>% -# dental_longer(id) %>% -# calculus_index(simple = T) %>% -# select(!c(n_surf, score_sum)) -# -# # Prepare data for EFA ---------------------------------------------------- -# -# data_list <- list(path_cond_clean, sinusitis_clean, caries_rate, periodont_status, periap_num, uhplc_calculus_replicated, calc_index) -# -# efa_data <- data_list %>% -# reduce(inner_join, by = "id") %>% -# mutate(across(where(is.logical), as.numeric)) %>% -# filter(complete.cases(.)) # Export data ------------------------------------------------------------- diff --git a/inst/scripts/02_data-explore.R b/inst/scripts/02_data-explore.R deleted file mode 100644 index b3f5f27..0000000 --- a/inst/scripts/02_data-explore.R +++ /dev/null @@ -1,69 +0,0 @@ -# Soft-deprecated -# should this all be done in the report? -library(readr) -library(dplyr) -library(tidyr) -library(purrr) - -# Upload data ------------------------------------------------------------- - -metadata <- read_tsv("inst/data/raw_data/metadata.tsv") -lloq <- read_tsv("inst/data/raw_data/lloq.tsv") -uhplc_calculus <- read_tsv("inst/data/derived_data/uhplc-data_cleaned.csv") -dental_inv <- read_csv("inst/data/raw_data/dental-inv.csv") -caries <- read_csv("inst/data/raw_data/caries.csv") -periodont <- read_csv("inst/data/raw_data/periodontitis.csv") -periap <- read_csv("inst/data/raw_data/periapical.csv") -calculus <- read_csv("inst/data/raw_data/calculus.csv") -demography <- read_csv("inst/data/raw_data/demography.csv") -sinusitis <- read_csv("inst/data/derived_data/sinusitis_cleaned.csv") - - -# Convert to long --------------------------------------------------------- - -dental_inv_long <- dental_inv %>% # Inventory - right_join(demography) %>% - select(!c(occupation, own_grave, tax, pipe_notch)) %>% - pivot_longer(t11:t48, names_to = "tooth", - values_to = "status") %>% - mutate(status = case_when(status == "dna" ~ "m", # teeth missing due to DNA sampling - TRUE ~ status)) - -teeth_list <- list(caries, periodont, periap, calculus) - -test<- lapply( - teeth_list, - pivot_longer, - cols = t11:t48, - names_to = "tooth", - values_to = "score" -) - -caries_long <- caries %>% # Caries - pivot_longer(t11:t48, names_to = "tooth", - values_to = "caries_score") - -periodont_long <- periodont %>% # Periodontitis - pivot_longer(cols = t11:t48, - names_to = "tooth", - values_to = "periodont_score") - -periap_long <- periap %>% # Periapical lesions - pivot_longer(cols = t11:t48, - names_to = "tooth", - values_to = "periap_score") - -calculus_long <- calculus %>% # Calculus - pivot_longer(cols = t11:t48, - names_to = "tooth", - values_to = "calculus_score") - -# prepare to merge data frames -teeth_list <- list(dental_inv_long, caries_long, periodont_long, - periap_long, calculus_long) - -# Long dental frame - -dental_long <- teeth_list %>% - reduce(inner_join, by = c("id", "tooth")) %>% - tooth_position() diff --git a/inst/scripts/03_data-analyse.R b/inst/scripts/03_data-analyse.R deleted file mode 100644 index d1828d4..0000000 --- a/inst/scripts/03_data-analyse.R +++ /dev/null @@ -1,104 +0,0 @@ -# data analysis -library(psych) -library(dplyr) -library(readr) -library(corrplot) - -# Upload data ------------------------------------------------------------- - -#efa_data <- read_csv("inst/data/derived_data/efa-analysis_data.csv") - -# combine nicotine and cotinine to tobacco -efa_comb <- efa_data %>% - mutate(tobacco = case_when(nicotine == 1 | cotinine == 1 ~ 1, - TRUE ~ 0)) %>% - select(!c(nicotine, cotinine)) - -# Exploratory factor analysis (psych package?) - - # tetrachoric correlation for dichotomous variables - - -# Missing values - - - -# selection of variables (remove very low and high frequencies) - -# dichotomise caries and calculus ratios for polychoric correlation - # caries ratios falling below the 25th percentile are considered absent - # discretisation of caries and calculus -caries_discrete <- quantile(caries_rate$caries_rate) -calculus_discrete <- quantile(calc_index$calc_index) - -caries_bin <- caries_rate %>% - mutate(caries_rate = case_when(caries_rate > quantile(caries_rate)[2] ~ 1, - TRUE ~ 0 - )) - - -efa_filter <- efa_comb %>% - select(!c(OD)) %>% # vertebral osteophytosis highly correlated with OA - summarise(across(where(is.numeric), function(x) sum(x) / length(x))) %>% - select(which(colSums(.) > 0.1)) - -efa_select <- efa_comb %>% - select(names(efa_filter)) %>% - select(!c(periap_num)) %>% - filter(complete.cases(.)) #%>% - as.matrix() - -efa_discrete <- efa_select %>% - mutate( - calc_index = case_when( - calc_index <= calculus_discrete[1] ~ 0, - calc_index >= calculus_discrete[1] & calc_index < calculus_discrete[2] ~ 1, - calc_index >= calculus_discrete[2] & calc_index < calculus_discrete[3] ~ 2, - calc_index >= calculus_discrete[3] & calc_index < calculus_discrete[4] ~ 3, - calc_index >= calculus_discrete[4] & calc_index <= calculus_discrete[5] ~ 4, - ), - caries_rate = case_when( - caries_rate <= caries_discrete[1] ~ 0, - caries_rate >= caries_discrete[1] & caries_rate < caries_discrete[2] ~ 1, - caries_rate >= caries_discrete[2] & caries_rate < caries_discrete[3] ~ 2, - caries_rate >= caries_discrete[3] & caries_rate < caries_discrete[4] ~ 3, - caries_rate >= caries_discrete[4] & caries_rate <= caries_discrete[5] ~ 4, - ) - ) %>% - as.matrix() - - - -KMO(cor(efa_discrete)) # lower than suggested 0.6, maybe factor analysis not good -cortest.bartlett(efa_discrete) # small value, factor analysis okay - - - -# Polychoric correlation - -cor(efa_select)[,c("caries_rate","calc_index")] # for comparing caries and calculus with compounds before dichotomisation - -polycorr <- polychoric(efa_discrete) - -corrplot(polycorr$rho, method = "number", type = "lower", diag = F) - -# Factor analysis --------------------------------------------------------- - -efa_init <- fa(efa_discrete, nfactors = ncol(efa_discrete), rotate = "none") -eigenval <- efa_init$e.values -data.frame( - n_fact = as.factor(1:length(eigenval)), - eigenval - ) %>% - ggplot(aes(x = n_fact, y = eigenval, group = 1)) + - geom_line() + - geom_point() - -efa_analysis <- fa(efa_select) -fa.diagram(efa_analysis) - - - -# Multiple correspondence analysis ---------------------------------------- - - diff --git a/inst/scripts/new-explore.R b/inst/scripts/new-explore.R deleted file mode 100644 index fc595aa..0000000 --- a/inst/scripts/new-explore.R +++ /dev/null @@ -1,118 +0,0 @@ -library(tidyverse) -library(here) -source(here("scripts/dental-plot.R")) - -dental_long <- readr::read_csv(here("data/derived_data/dental-full_long.csv")) - - - -# Data wrangling ---------------------------------------------------------- - -# Caries rate - -# convert caries data from location to number of caries and caries rate -# number of lesions per tooth observed -# separate anterior and posterior teeth -surface <- c("mes", "dis", "occ", "buc", "lin", "root", "crown") # here occ = incisal - -caries_rate <- dental_long %>% - filter(status == "p") %>% - separate_rows(caries_score, sep = ";") %>% # one lesion per row - mutate(caries_count = if_else( - caries_score == "none", 0L, 1L - ) - ) %>% - group_by(id, tooth_type, position) %>% - summarise( - n_teeth = n(), - count = sum(caries_count, na.rm = T), - rate = count / n_teeth - ) - - - # mutate(caries_count = if_else(caries_score == "none", 0L, - # stringi::stri_count_words(caries_score) - # ) - # ) - -#caries_rate <- -# caries_long %>% -# group_by(id, position) %>% -# filter(status == "p") %>% -# summarise(n_teeth = n(), -# count = sum(caries_count, na.rm = T), -# rate = count / n_teeth -# ) %>% -# ungroup() -#filter(status == "p") %>% -# summarise(caries_count = sum(caries_count, na.rm = T)) %>% #view -# group_by(id) %>% -# summarise(n_teeth = n(), -# caries_rate = sum(caries_count, na.rm = T) / n_teeth) - -# caries_rate_type <- dental_long %>% -# group_by(tooth_type) %>% -# summarise(n_teeth = sum(!is.na(caries_count)), -# n_caries = sum(caries_count, na.rm = T), -# caries_rate = n_caries / n_teeth) # shouldn't be any NA?? - -# CARIES RATE CALCULATION - -# Caries longform data with row for each surface of each tooth? - -# convert periodontitis and calculus to mean/median score? - - - -#full_data_long - -# compare replicated samples across batch 1 and 2 - -caffeine_rep <- uhplc_data_comb %>% - select(sample, contains("caffeine")) %>% - select(sample, contains("calc")) %>% - summarise(caffeine = sum(caffeine_calc_batch1, caffeine_calc_batch2)) %>% - mutate(caffeine = if_else(!is.na(caffeine), "yes", "no")) -# mutate(caffeine = rowSums()) %>% -# select(sample, caffeine) - -replicates <- data.frame("sample" = uhplc_data_batch2$sample) - -tobacco <- compounds %>% - #na.omit() %>% - filter(compound %in% c("nicotine", "cotinine")) %>% - inner_join(demography, by = "id") -# Summary data frames ----------------------------------------------------- - -# Central tendency of dental lesions per individual - - - -# Plots ------------------------------------------------------------------- - -# types of lesions -dental_long %>% - filter(!is.na(caries_score), - caries_score != "none", - caries_score != "root?") %>% - separate_rows(caries_score, sep = ";") %>% - dental_plot(fill = caries_score) - -# overall distribution of dental lesions in sample - -# dental_long %>% -# dental_plot(caries_count) -# -# dental_long %>% -# dental_plot(periodont_score) -# -# dental_long %>% -# dental_plot(periap_score) - -## this plot is essentially useless since I targeted individuals with calculus -# on lower incisors -# dental_long %>% -# dental_plot(calculus_score) - -# distribution of - diff --git a/inst/scripts/new-prep.R b/inst/scripts/new-prep.R deleted file mode 100644 index 8df4e75..0000000 --- a/inst/scripts/new-prep.R +++ /dev/null @@ -1,185 +0,0 @@ -library(tidyverse) -library(here) - -# Upload data ------------------------------------------------------------- - -# See README for details -lloq <- readr::read_tsv(here("data/lloq.tsv")) -uhplc_data_raw <- readr::read_csv(here("data/raw/anon/uhplc-results.csv")) -uhplc_data_raw2 <- readr::read_csv(here("data/raw/anon/uhplc-results_batch2.csv")) -dental_inv <- readr::read_csv(here("data/raw/anon/dental-inv.csv")) -caries <- readr::read_csv(here("data/raw/anon/caries.csv")) -periodont <- readr::read_csv(here("data/raw/anon/periodontitis.csv")) -periap <- readr::read_csv(here("data/raw/anon/periapical.csv")) -calculus <- readr::read_csv(here("data/raw/anon/calculus.csv")) -demography <- readr::read_csv(here("data/raw/anon/demography.csv")) -sinusitis <- readr::read_csv(here("data/raw/anon/sinusitis.csv"), na = "UNOBS") -# CMS = chronic maxillary sinusitis -# IPR = periosteal reaction on visceral surface of ribs - # present only if active (pulmonary infection) - -# Helper objects ---------------------------------------------------------- - -# vectors to categorise teeth by position and type (FDI notation) -maxilla <- c(paste0("t", 11:18), paste0("t", 21:28)) -mandible <- c(paste0("t", 31:38), paste0("t", 41:48)) -left <- c(paste0("t", 21:28), paste0("t", 31:38)) -posterior <- c(paste0("t", 14:18), paste0("t", 24:28), - paste0("t", 34:38), paste0("t", 44:48)) -molar <- c(paste0("t", 16:18), paste0("t", 26:28), - paste0("t", 36:38), paste0("t", 46:48)) -incisor <- c(paste0("t", 11:12), paste0("t", 21:22), - paste0("t", 31:32), paste0("t", 41:42)) -premolar <- c(paste0("t", 14:15), paste0("t", 24:25), - paste0("t", 34:35), paste0("t", 44:45)) -canine <- c(paste0("t", 13), paste0("t", 23), - paste0("t", 33), paste0("t", 43)) - -# Data cleaning ----------------------------------------------------------- - -# UHPLC-MS/MS results - -uhplc_data <- uhplc_data_raw %>% - filter(sample != "16b") %>% # sample 16b removed (test sample that differs from 16 in sampling location) - group_by(sample) %>% - mutate(across(weight:cocaine_calculus, sum)) %>% # combine 12.1 and 12.2 (split samples from same sampling location) - filter(sample != "12.2") %>% - rename(theophyl_calc = thyephyl_calc) #%>% - -uhplc_data$sample[uhplc_data$sample == "12.1"] <- "12" # change value to 12 -uhplc_data$sample <- as.factor(uhplc_data$sample) - -write_csv(uhplc_data, here("data/derived_data/uhplc-data_cleaned.csv")) -# write cleaned data to data/derived_data - -id_sample_key <- uhplc_data %>% - select(id, sample) - -uhplc_data_batch2 <- uhplc_data_raw2 %>% - rename(theophyl_calc = theophylline_calc) %>% - mutate(sample = as_factor(sample)) - -write_csv(uhplc_data_batch2, here("data/derived_data/uhplc-data-batch2_cleaned.csv")) -# write cleaned data to data/derived_data - -# Combine batch 1 and 2 - -uhplc_data_comb <- uhplc_data %>% - full_join(uhplc_data_batch2, by = "sample", suffix = c("_batch1", "_batch2")) -names(uhplc_data_comb) - -uhplc_long_batch1 <- uhplc_data %>% - pivot_longer(cols = caffeine_wash1:cocaine_calculus, - names_to = c("compound", "wash"), names_sep = "_", - values_to = "quantity") %>% # combine wash1-3 into single column - select(!c(weight, weight_wash1, weight_wash2, weight_wash3)) %>% - mutate(batch = 1) - -uhplc_long_batch2 <- uhplc_data_batch2 %>% - semi_join(id_sample_key) %>% - pivot_longer(cols = caffeine_wash1:cocaine_calc, - names_to = c("compound", "wash"), names_sep = "_", - values_to = "quantity") %>% # combine wash1-3 into single column - select(!c(weight, weight_wash1, weight_wash2, weight_wash3)) %>% - mutate(batch = 2) - -uhplc_long <- uhplc_long_batch1 %>% - bind_rows(uhplc_long_batch2) - -## NEED TO CALCULATE CONCENTRATION BEFORE COMBINING (although concentration is not really relevant) ## - -compounds_batch1 <- uhplc_data %>% - select(!contains(c("wash", "weight"))) %>% - pivot_longer(cols = where(is.numeric), - names_to = "compound", values_to = "quant") %>% - mutate(batch = 1) %>% - mutate(compound = str_replace(compound, "_calc", "")) - -compounds_batch2 <- uhplc_data_batch2 %>% - select(!contains(c("wash", "weight"))) %>% - pivot_longer(cols = caffeine_calc:cocaine_calc, - names_to = "compound", values_to = "quant") %>% - mutate(batch = 2) %>% - mutate(compound = str_replace(compound, "_calc", "")) - -# combined data frame of compound detection across both batches -compounds <- compounds_batch2 %>% - left_join(id_sample_key) %>% - bind_rows(compounds_batch1) %>% - mutate(detection = if_else(is.na(quant), FALSE, TRUE)) # presence/absence variable - - -# Calculate concentration (ng / mg) of each compound in the calculus - -uhplc_data_conc <- uhplc_data %>% - select(!contains(c("_wash1", "_wash2"))) %>% - mutate(across(contains("_calc"), ~ .x / weight_wash3)) %>% - select(!contains("_wash3")) - -sinusitis <- sinusitis %>% - mutate(across(CMS:IPR, - function(x) case_when(x == "YES" ~ TRUE, - x == "NO" ~ FALSE))) - -# Dental data ------------------------------------------------------------- - -# convert to long form - -dental_inv_long <- dental_inv %>% # Inventory - right_join(demography) %>% - select(!c(occupation, own_grave, tax, pipe_notch)) %>% - pivot_longer(t11:t48, names_to = "tooth", - values_to = "status") %>% - mutate(status = case_when(status == "dna" ~ "m", # teeth missing due to DNA sampling - TRUE ~ status)) - -caries_long <- caries %>% # Caries - pivot_longer(t11:t48, names_to = "tooth", - values_to = "caries_score") - -periodont_long <- periodont %>% # Periodontitis - pivot_longer(cols = t11:t48, - names_to = "tooth", - values_to = "periodont_score") - -periap_long <- periap %>% # Periapical lesions - pivot_longer(cols = t11:t48, - names_to = "tooth", - values_to = "periap_score") - -calculus_long <- calculus %>% # Calculus - pivot_longer(cols = t11:t48, - names_to = "tooth", - values_to = "calculus_score") - -# Combined data ----------------------------------------------------------- - -# prepare to merge data frames -teeth_list <- list(dental_inv_long, caries_long, periodont_long, - periap_long, calculus_long) - -# Long dental frame - -dental_long <- teeth_list %>% - reduce(inner_join, by = c("id", "tooth")) %>% - mutate(region = if_else(tooth %in% maxilla, "maxilla", "mandible"), - side = if_else(tooth %in% left, "left", "right"), - position = if_else(tooth %in% posterior, "posterior", "anterior"), - tooth_type = case_when(tooth %in% incisor ~ "incisor", - tooth %in% canine ~ "canine", - tooth %in% premolar ~ "premolar", - tooth %in% molar ~ "molar"), - .before = status) %>% - mutate(comb_sex = case_when(sex == "pm" ~ "m", - sex == "pf" ~ "f", - TRUE ~ sex)) - -write_csv(dental_long, here("data/derived_data/dental-full_long.csv")) - -# data frame with one row per individual - # one column per dental disease - # one column for presence/absence of compound - -# all_data_wide <- caries_rate %>% -# left_join(aml_rate) - diff --git a/inst/scripts/setup-qmd.R b/inst/scripts/setup-qmd.R index 1691ed9..0ae6b45 100644 --- a/inst/scripts/setup-qmd.R +++ b/inst/scripts/setup-qmd.R @@ -1,5 +1,3 @@ -library(here) -library(readr) library(dplyr) library(tidyr) library(tibble) @@ -10,7 +8,6 @@ library(glue) library(purrr) library(psych) library(patchwork) -devtools::load_all() try(generate_bib()) @@ -71,45 +68,6 @@ uhplc_calculus_long <- uhplc_data_long %>% left_join(demography, by = "id") %>% mutate(preservation = factor(preservation, levels = c("fair", "good", "excellent"))) -# uhplc_calculus_demography <- uhplc_calculus_long %>% -# left_join(demography) %>% -# mutate(preservation = factor(preservation, levels = c("fair", "good", "excellent"))) - -# presence/absence data frame -uhplc_calculus_bin <- uhplc_calculus_long %>% - mutate(presence = if_else(quant > 0, 1, quant)) - -# successfully replicated samples only -uhplc_calculus_replicated <- uhplc_calculus_bin %>% - mutate(compound = str_remove(compound, "_calc")) %>% - group_by(id, sample, compound) %>% # combine batches - summarise(presence = sum(presence)) #%>% - # filter(presence == 0 | presence == 2) %>% - # group_by(compound) %>% - # mutate(presence = if_else(presence == 2, 1, 0)) # convert replications to presence/absence - -uhplc_calculus_replicated <- uhplc_calculus_bin %>% - filter(id %in% filter(metadata, replicated == TRUE)$id) %>% - #right_join(select(filter(metadata, replicated == TRUE)), by = c("id", "sample")) - #mutate(compound = str_remove(compound, "_calc")) %>% - group_by(id, sample, compound) %>% # combine batches - summarise(presence = sum(presence)) %>% - filter(presence == 0 | presence == 2) %>% # remove compounds only detected in one batch - group_by(compound) %>% - mutate(presence = if_else(presence == 0, 0, 1)) %>% # convert replications to presence/absence - ungroup() - -uhplc_replicated_wide <- uhplc_calculus_replicated %>% - mutate(compound = case_when(compound == "nicotine" ~ "tobacco", - compound == "cotinine" ~ "tobacco", - TRUE ~ compound)) %>% - group_by(id, sample, compound) %>% - summarise(presence = sum(presence)) %>% # combine nicotine and cotinine - #remove_missing() %>% - mutate(presence = case_when(presence > 0 ~ TRUE, - TRUE ~ FALSE)) %>% - pivot_wider(names_from = "compound", values_from = "presence") - uhplc_conc_wide <- uhplc_calculus_long %>% filter(batch == "batch2") %>% #left_join(select(metadata, id, batch2_weight)) %>% @@ -353,29 +311,4 @@ polycorr_tib <- polycorr$rho %>% # ) %>% # filter(corr != 1) -# objects with correlation statements about variables - -# if(nrow(filter(polycorr_tib, strength == "strong")) == 0){ -# strong_correlations <- "No strong correlations were found" -# } else { -# strong_correlations <- polycorr_tib %>% -# filter(strength == "strong") %>% -# #distinct(rho, .keep_all = T) %>% -# slice(seq(from = 2, to = nrow(.), by = 2)) %>% # awkward solution to distinct not working -# mutate(statement = glue("{var} and {name} ({signif(corr, 3)})")) %>% -# .$statement %>% -# paste(collapse = ", ") -# } -# -# if(nrow(filter(polycorr_tib, strength == "moderate")) == 0){ -# moderate_correlations <- "No moderate correlations were found" -# } else { -# moderate_correlations <- polycorr_tib %>% -# filter(strength == "moderate") %>% -# arrange(desc(abs(corr))) %>% -# slice(seq(from = 2, to = nrow(.), by = 2)) %>% # awkward solution to distinct not working -# #distinct(rho, .keep_all = TRUE) %>% # not working -# mutate(statement = glue("{var} and {name} ({signif(corr, 3)})")) %>% -# .$statement %>% -# paste(collapse = ", ") -# } +