Skip to content

Commit

Permalink
clean up code
Browse files Browse the repository at this point in the history
  • Loading branch information
bbartholdy committed Feb 7, 2023
1 parent df4f3bf commit 3268c77
Show file tree
Hide file tree
Showing 9 changed files with 9 additions and 674 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: mb11CalculusPilot
Title: Research compendium
Version: 0.0.3
Version: 0.0.0.9000
Authors@R:
person("First", "Last", , "first.last@example.com", role = c("aut", "cre"))
Description: Research compendium for 'Multiproxy analysis exploring patterns of diet and disease in dental calculus and skeletal remains from a 19th century Dutch population'.
Expand Down
37 changes: 5 additions & 32 deletions inst/paper/_results.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ For a full list of targeted compounds, see Supplementary Material.

```{r}
#| label: detection-tbl-setup
uhplc_calculus_detect <- uhplc_calculus_long %>%
#uhplc_calculus_detect <- uhplc_calculus_demography %>%
#remove_missing() %>%
Expand Down Expand Up @@ -115,13 +114,14 @@ quantity of compound extracted from the calculus ([@fig-detection-preservation]B
```{r}
#| label: fig-detection-preservation
#| fig-cap: "(A) Violin plot with overlaid box plots depicting the distribution of extracted quantities of each compound from batch 2 separated by state of preservation of the skeleton. (B) Extracted quantity (ng) of compound plotted against weights of the calculus samples from batch 2."
#| fig-width: 5
#| fig-height: 7
preservation_plot <- uhplc_calculus_long %>%
filter(
!is.na(preservation),
quant > 0,
batch == "batch2",
#!compound %in% c("cbd", "cbn", "cocaine", "thc", "thca-a", "thcva"), # remove compounds not detected
id != "MB329" # unusually high nicotine and cotinine quantity in batch 1
) %>%
ggplot(aes(x = preservation, y = quant)) +
Expand All @@ -140,10 +140,8 @@ preservation_plot <- uhplc_calculus_long %>%
weight_quant_plot <- uhplc_calculus_long %>%
filter(
#!is.na(preservation),
batch == "batch2",
quant > 0,
#!compound %in% c("cbd", "cbn", "cocaine", "thc", "thca-a", "thcva"), # remove compounds not detected
id != "MB329" # unusually high nicotine and cotinine quantity in batch 1
) %>%
ggplot(aes(x = weight, y = quant)) +
Expand All @@ -153,28 +151,6 @@ weight_quant_plot <- uhplc_calculus_long %>%
theme(axis.title.y = element_blank()) +
labs(y = "Quantity (ng)", x = "Weight (mg)")
# weight_quant_plot <- uhplc_data_comb %>%
# select(sample, contains("batch2")) %>%
# select(sample, batch2_weight, contains("calc")) %>%
# pivot_longer(
# -c(sample, batch2_weight),
# names_to = c("compound", "batch"),
# names_pattern = "(.*)_(.*)",
# values_to = c("conc")
# ) %>%
# mutate(compound = str_remove(compound, "_calc")) %>%
# #remove_missing() %>%
# filter(
# conc > 0,
# id != "MB329" # unusually high nicotine and cotinine quantity in batch 1
# ) %>%
# ggplot(aes(x = batch2_weight, y = conc)) +
# geom_point(col = viridisLite::viridis(1)) +
# facet_wrap(~ compound, scales = "free_y", labeller = labeller(compound = compound_names), dir = "v") +
# theme_bw() +
# theme(axis.title.y = element_blank()) +
# labs(y = "Quantity (ng)", x = "Weight (mg)")
preservation_plot + weight_quant_plot + plot_annotation(tag_levels = "A")
```

Expand All @@ -197,7 +173,6 @@ tobacco_accuracy <- tobacco %>%
group_by(sample, id, .drop = F) %>%
summarise(
detection = sum(detection),
#compound = compound
) %>%
left_join(select(demography, id, pipe_notch, preservation, age), by = "id") %>%
mutate(
Expand Down Expand Up @@ -266,23 +241,21 @@ and may have been contaminated.
corr_tib <- conc_cor %>%
as_tibble(rownames = "var") %>%
filter(var != "age",
var != "periodont_status") #%>%
#mutate(across(everything(), signif, 3))
var != "periodont_status")
corr_tib_long <- corr_tib %>%
pivot_longer(
-var,
names_to = "name",
values_to = "corr"
)
# still need to remove everything under the diagonal (colour white?)
#corr_tib[corr_tib == 1] <- kableExtra::cell_spec(corr_tib[corr_tib == 1], color = "transparent")
corr_tib_out <- corr_tib %>%
column_to_rownames("var") %>%
as.matrix() %>%
signif(digits = 2)
cols <- 0
for(i in 7:nrow(corr_tib)){ # terrible solution for formatting table
for(i in 7:nrow(corr_tib)){ # remove repeated values
cols <- cols + 1
corr_tib_out[i,1:cols] <- ""
}
Expand Down
2 changes: 1 addition & 1 deletion inst/paper/paper.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
library(here)
library(readr)
devtools::load_all()
# upload data
metadata <- read_tsv(here("inst/data/raw_data/metadata.tsv"))
demography <- read_csv(here("inst/data/raw_data/demography.csv"))
lloq <- read_tsv(here("inst/data/raw_data/lloq.tsv"))
uhplc_data_comb <- read_csv(here("inst/data/derived_data/uhplc-data_combined.csv"))
#uhplc_calculus <- read_csv(here("inst/data/derived_data/uhplc-calculus_cleaned.csv"))
dental_inv <- read_csv(here("inst/data/raw_data/dental-inv.csv"))
caries <- read_csv(here("inst/data/raw_data/caries.csv"))
periodont <- read_csv(here("inst/data/raw_data/periodontitis.csv"))
Expand Down
97 changes: 1 addition & 96 deletions inst/scripts/01-data_prep.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,12 @@ library(purrr)
# Upload data -------------------------------------------------------------

# See README for details
metadata <- read_tsv("inst/data/raw_data/metadata.tsv") # ids need to be changed to match key
metadata <- read_tsv("inst/data/raw_data/metadata.tsv")
lloq <- read_tsv("inst/data/raw_data/lloq.tsv")
uhplc_data_raw <- read_csv("inst/data/raw_data/uhplc-results.csv")
uhplc_data_raw2 <- read_csv("inst/data/raw_data/uhplc-results_batch2.csv")
dental_inv <- read_csv("inst/data/raw_data/dental-inv.csv")
sinusitis <- read_csv("inst/data/raw_data/sinusitis.csv")
path_cond <- read_csv("inst/data/raw_data/path-conditions.csv")
caries <- read_csv("inst/data/raw_data/caries.csv")
periodont <- read_csv("inst/data/raw_data/periodontitis.csv")
periap <- read_csv("inst/data/raw_data/periapical.csv")
calculus <- read_csv("inst/data/raw_data/calculus.csv")
calculus_full <- read_csv("inst/data/raw_data/calculus_full.csv")
demography <- read_csv("inst/data/raw_data/demography.csv")


# CMS = chronic maxillary sinusitis
# IPR = periosteal reaction on visceral surface of ribs
Expand Down Expand Up @@ -77,38 +69,6 @@ uhplc_data_comb <- uhplc_data_batch1 %>%
#select(sample, contains("calc"))


# Compounds presence/absence

# uhplc_calculus_bin <- uhplc_calculus %>%
# mutate(
# across(
# where(is.numeric),
# function(x) if_else(!is.na(x), T, F) # convert concentration to boolean
# )
# ) %>%
# pivot_longer( # convert to long form to include only compounds present or absent in both batches
# cols = -sample,
# names_to = c("compound", "batch"),
# names_pattern = "(.*)_(.*)",
# values_to = c("value")
# ) %>%
# mutate(compound = str_remove(compound, "_calc"), # redundant suffix
# value = as.numeric(value)) %>% # convert to 0s and 1s
# group_by(sample, compound) %>%
# summarise(value = sum(value))

# create data frame with replicated compounds and remove compounds absent in all individuals
# uhplc_calculus_replicated <- uhplc_calculus_bin %>%
# group_by(id,compound) %>%
# summarise(presence = sum(presence)) %>%
# filter(presence == 0 | presence == 2) %>%
# #filter(sum(presence) != 0) #%>% # remove compounds absent in all individuals in sample
# mutate(presence = if_else(presence == 2, 1, 0)) %>% # convert replications to presence/absence
# pivot_wider(names_from = compound, values_from = presence) %>%
# mutate(across(where(is.numeric), replace_na, 0)) %>% # replace NAs with 0 (not ideal solution, may be reconsidered)
# left_join(select(metadata, id, sample), by = "id")


# Sinusitis data

sinusitis_clean <- sinusitis %>% # convert yes/no to true/false
Expand All @@ -126,61 +86,6 @@ path_cond_clean <- path_cond %>%
x == "N" ~ FALSE)
)
)
#
# # dental data
#
# # Caries rate
#
# # convert caries data from location to number of caries and caries rate
#
# surface <- c("mes", "dis", "occ", "buc", "lin", "root", "crown") # here occ = incisal
#
# caries_rate <- caries %>%
# pivot_longer(t11:t48, names_to = "tooth",
# values_to = "caries_score") %>%
# na.omit() %>%
# separate_rows(caries_score, sep = ";") %>% # one lesion per row
# mutate(
# caries_count = if_else(
# caries_score == "none", 0L, 1L
# )
# ) %>%
# group_by(id) %>%
# summarise(
# n_teeth = n(),
# count = sum(caries_count, na.rm = T), # crude caries rate
# caries_rate = count / n_teeth
# ) %>%
# select(id, caries_rate)
#
# # Median periodontal status
#
# periodont_status <- periodont %>%
# mutate(periodont_status = apply(.[,-1], MARGIN = 1, FUN = median, na.rm = T)) %>%
# select(id, periodont_status)
#
# # number of periapical lesions
#
# periap_num <- periap %>%
# mutate(across(-id, ~ if_else(.x == "none", 0, 1))) %>%
# mutate(periap_num = apply(.[,-1], MARGIN = 1, FUN = sum, na.rm = T)) %>%
# select(id, periap_num)
#
# # calculus index
#
# calc_index <- calculus_full %>%
# dental_longer(id) %>%
# calculus_index(simple = T) %>%
# select(!c(n_surf, score_sum))
#
# # Prepare data for EFA ----------------------------------------------------
#
# data_list <- list(path_cond_clean, sinusitis_clean, caries_rate, periodont_status, periap_num, uhplc_calculus_replicated, calc_index)
#
# efa_data <- data_list %>%
# reduce(inner_join, by = "id") %>%
# mutate(across(where(is.logical), as.numeric)) %>%
# filter(complete.cases(.))


# Export data -------------------------------------------------------------
Expand Down
69 changes: 0 additions & 69 deletions inst/scripts/02_data-explore.R

This file was deleted.

Loading

0 comments on commit 3268c77

Please sign in to comment.