Skip to content

Commit

Permalink
Merge pull request #37 from mdnestor/dev
Browse files Browse the repository at this point in the history
Bugfixes to Motrpac tables.
  • Loading branch information
vladpetyuk authored Apr 6, 2021
2 parents e50afd2 + d719444 commit fb6a3bf
Showing 1 changed file with 35 additions and 24 deletions.
59 changes: 35 additions & 24 deletions R/motrpac_bic_funtions.R
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ make_rii_peptide_ph <- function(msnid, masic_data, fractions, samples, reference
mutate(Reference = 1)

## Create crosstab
aggregation_level <- c("accession", "peptide", "SiteID")
aggregation_level <- c("accession", "Peptide", "SiteID")
crosstab <- create_crosstab(msnid,
masic_data,
aggregation_level,
Expand All @@ -209,7 +209,8 @@ make_rii_peptide_ph <- function(msnid, masic_data, fractions, samples, reference
select(Specie) %>%
mutate(protein_id = sub("(^.*)@(.*)@(.*)", "\\1", Specie),
sequence = sub("(^.*)@(.*)@(.*)", "\\2", Specie),
ptm_id = sub("(^.*)@(.*)@(.*)", "\\3", Specie),
ptm_id = sub("(^.*)@(.*)@(.*)", "\\3", Specie)) %>%
mutate(ptm_peptide = paste(ptm_id, sequence, sep=sep),
organism_name = org_name) %>%
mutate(REFSEQ = sub("(^.*)\\.\\d+", "\\1", protein_id))

Expand Down Expand Up @@ -243,7 +244,8 @@ make_rii_peptide_ph <- function(msnid, masic_data, fractions, samples, reference
is_contaminant = grepl("Contaminant", protein_id))

rii_peptide <- inner_join(rii_peptide, ids) %>%
mutate(ptm_id = gsub("-", sep, ptm_id))
mutate(ptm_id = gsub("-", sep, ptm_id),
ptm_peptide = gsub("-", sep, ptm_peptide))

## Join with crosstab
rii_peptide <- inner_join(rii_peptide, crosstab) %>%
Expand Down Expand Up @@ -285,12 +287,12 @@ make_results_ratio_ph <- function(msnid, masic_data, fractions, samples,
## Additional info from MS/MS
ids <- psms(msnid) %>%
select(accession, peptide, SiteID,
redundantAccessions, flankingSequence,
noninferableProteins, flankingSequence,
MSGFDB_SpecEValue, maxAScore) %>%
rename(protein_id = accession,
sequence = peptide,
ptm_id = SiteID,
redundant_ids = redundantAccessions,
redundant_ids = noninferableProteins,
flanking_sequence = flankingSequence) %>%
# group at peptide level to calculate peptide score, confident score
group_by(protein_id, sequence, ptm_id, flanking_sequence, redundant_ids) %>%
Expand Down Expand Up @@ -331,21 +333,21 @@ map_flanking_sequence <- function (msnid, fasta, radius=7L, collapse="|") {
}

x <- psms(msnid) %>%
dplyr::select(accession, Peptide, SiteLoc) %>%
select(accession, SiteLoc) %>%
distinct()

x <- fasta %>%
as.data.frame() %>%
rownames_to_column("accession") %>%
dplyr::mutate(accession = sub("^(.P_\\d+\\.\\d+)?\\s.*", "\\1", accession)) %>%
dplyr::rename(ProtSeq = x) %>%
dplyr::mutate(ProtSeqWidth = nchar(ProtSeq)) %>%
inner_join(x, .)
mutate(accession = sub("^(.P_\\d+\\.\\d+)?\\s.*", "\\1", accession)) %>%
rename(ProtSeq = x) %>%
mutate(ProtSeqWidth = nchar(ProtSeq)) %>%
inner_join(x, ., by="accession")


f <- function(ProtSeq_i, SiteLoc_i) {
flankingSequences <- c()
for (k in unlist(SiteLoc_i)) {
for (k in unlist(SiteLoc_i[[1]])) {

site_left <- substr(ProtSeq_i, max(k-radius,1), k-1)

Expand Down Expand Up @@ -373,8 +375,10 @@ map_flanking_sequence <- function (msnid, fasta, radius=7L, collapse="|") {
x$flankingSequence <- map2(x$ProtSeq, x$SiteLoc, f)
x$flankingSequence <- as.character(x$flankingSequence)

msnid@psms <- psms(msnid) %>% mutate(flankingSequence=NULL) %>%
left_join(x) %>% data.table()
msnid@psms <- psms(msnid) %>%
mutate(flankingSequence=NULL) %>%
left_join(x, by=c("accession", "SiteLoc")) %>%
data.table()

return(msnid)
}
Expand All @@ -385,11 +389,11 @@ map_flanking_sequence <- function (msnid, fasta, radius=7L, collapse="|") {
#' @rdname motrpac_bic_output
assess_redundant_protein_matches <- function(msnid, collapse="|") {
msnid@psms <- psms(msnid) %>%
dplyr::select(accession, peptide) %>%
select(accession, peptide) %>%
distinct() %>%
group_by(peptide) %>%
summarize(redundantAccessions = paste(accession, collapse=collapse)) %>%
left_join(psms(msnid), .) %>%
left_join(psms(msnid), ., by="peptide") %>%
data.table()
return(msnid)
}
Expand All @@ -400,7 +404,7 @@ assess_noninferable_proteins <- function(msnid, collapse="|") {

# assign each accession to its peptide set
x <- psms(msnid) %>%
dplyr::select(accession, Peptide) %>%
select(accession, Peptide) %>%
group_by(accession) %>%
arrange(Peptide) %>%
summarize(peptide_set = paste(Peptide, collapse=collapse))
Expand All @@ -409,11 +413,13 @@ assess_noninferable_proteins <- function(msnid, collapse="|") {
x <- x %>%
group_by(peptide_set) %>%
summarize(noninferableProteins = paste(accession, collapse=collapse)) %>%
left_join(x) %>%
dplyr::select(-peptide_set)
left_join(x,by="peptide_set") %>%
select(-peptide_set)

msnid@psms <- psms(msnid) %>% mutate(noninferableProteins=NULL) %>%
left_join(x) %>% data.table()
msnid@psms <- psms(msnid) %>%
mutate(noninferableProteins=NULL) %>%
left_join(x, by="accession") %>%
data.table()

return(msnid)
}
Expand Down Expand Up @@ -459,16 +465,21 @@ compute_protein_coverage <- function(msnid, path_to_FASTA) {
return(percentAACoverage)
}

ids <- psms(msnid) %>% dplyr::select(Protein, pepSeq) %>%
ids <- psms(msnid) %>%
select(Protein, pepSeq) %>%
distinct()

proteins <- ids %>% dplyr::select(Protein) %>% distinct()
proteins <- ids %>%
select(Protein) %>%
distinct()

proteins$percentAACoverage <- map(proteins$Protein, get_coverage_for_single_protein, ids, fasta)
proteins$percentAACoverage <- as.numeric(proteins$percentAACoverage)

msnid@psms <- psms(msnid) %>% mutate(percentAACoverage=NULL) %>%
left_join(proteins) %>% data.table()
msnid@psms <- psms(msnid) %>%
mutate(percentAACoverage=NULL) %>%
left_join(proteins) %>%
data.table()

return(msnid)
}
Expand Down

0 comments on commit fb6a3bf

Please sign in to comment.