Skip to content

Commit

Permalink
Merge pull request #36 from mdnestor/dev
Browse files Browse the repository at this point in the history
Added is_contaminant column to MoTrPAC tables
  • Loading branch information
vladpetyuk authored Mar 24, 2021
2 parents 8ed0e8d + 3711151 commit e50afd2
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 11 deletions.
4 changes: 4 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,11 @@ Imports:
data.table,
dplyr,
plyr,
IRanges,
MSnbase,
odbc,
purrr,
tibble,
tidyr,
tidyselect,
tools,
Expand Down
30 changes: 19 additions & 11 deletions R/motrpac_bic_funtions.R
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ make_rii_peptide_gl <- function(msnid, masic_data, fractions, samples,
## Create RII peptide table
rii_peptide <- crosstab %>%
select(Specie) %>%
mutate(protein_id = sub("(^.*\\.\\d+)@.*", "\\1", Specie),
mutate(protein_id = sub("(^.*)@(.*)", "\\1", Specie),
sequence = sub("(^.*)@(.*)", "\\2", Specie),
organism_name = org_name) %>%
mutate(REFSEQ = sub("(^.*)\\.\\d+", "\\1", protein_id))
Expand All @@ -95,19 +95,20 @@ make_rii_peptide_gl <- function(msnid, masic_data, fractions, samples,
inner_join(., conv)

rii_peptide <- rii_peptide %>%
inner_join(conv) %>%
left_join(conv) %>%
rename(gene_symbol = SYMBOL,
entrez_id = ENTREZID) %>%
select(-REFSEQ)

## Additional info from MS/MS
ids <- psms(msnid) %>%
select(accession, peptide, redundantAccessions, MSGFDB_SpecEValue) %>%
select(accession, peptide, noninferableProteins, MSGFDB_SpecEValue) %>%
rename(protein_id = accession,
sequence = peptide,
redundant_ids = redundantAccessions) %>%
redundant_ids = noninferableProteins) %>%
group_by(protein_id, sequence, redundant_ids) %>%
summarize(peptide_score = min(MSGFDB_SpecEValue))
summarize(peptide_score = min(MSGFDB_SpecEValue)) %>%
mutate(is_contaminant = grepl("Contaminant", protein_id))

rii_peptide <- inner_join(rii_peptide, ids)

Expand All @@ -128,6 +129,10 @@ make_results_ratio_gl <- function(msnid, masic_data, fractions, samples,
crosstab <- create_crosstab(msnid, masic_data, aggregation_level, fractions,
samples, references)

# testing purposes
#crosstab <- rbind(crosstab, rnorm(10))
#rownames(crosstab)[nrow(crosstab)] <- "Contaminant_TRYP_PIG"

crosstab <- as.data.frame(crosstab) %>%
rownames_to_column("protein_id")

Expand All @@ -144,7 +149,7 @@ make_results_ratio_gl <- function(msnid, masic_data, fractions, samples,
inner_join(., conv)

results_ratio <- results_ratio %>%
inner_join(conv) %>%
left_join(conv) %>%
rename(gene_symbol = SYMBOL,
entrez_id = ENTREZID) %>%
select(-REFSEQ)
Expand All @@ -162,7 +167,8 @@ make_results_ratio_gl <- function(msnid, masic_data, fractions, samples,
summarize(peptide_score = min(MSGFDB_SpecEValue)) %>%
group_by(protein_id, redundant_ids, percent_coverage) %>%
summarize(protein_score = min(peptide_score),
num_peptides = n())
num_peptides = n()) %>%
mutate(is_contaminant = grepl("Contaminant", protein_id))

results_ratio <- inner_join(results_ratio, ids)

Expand Down Expand Up @@ -214,7 +220,7 @@ make_rii_peptide_ph <- function(msnid, masic_data, fractions, samples, reference
conv <- fetch_conversion_table(org_name, from = "REFSEQ", "ENTREZID") %>%
inner_join(., conv)

rii_peptide <- inner_join(rii_peptide, conv) %>%
rii_peptide <- left_join(rii_peptide, conv) %>%
rename(gene_symbol = SYMBOL,
entrez_id = ENTREZID) %>%
select(-REFSEQ)
Expand All @@ -233,7 +239,8 @@ make_rii_peptide_ph <- function(msnid, masic_data, fractions, samples, reference
summarize(peptide_score = min(MSGFDB_SpecEValue),
confident_score = max(maxAScore)) %>%
mutate(confident_site = case_when(confident_score >= 17 ~ TRUE,
confident_score < 17 ~ FALSE))
confident_score < 17 ~ FALSE),
is_contaminant = grepl("Contaminant", protein_id))

rii_peptide <- inner_join(rii_peptide, ids) %>%
mutate(ptm_id = gsub("-", sep, ptm_id))
Expand Down Expand Up @@ -270,7 +277,7 @@ make_results_ratio_ph <- function(msnid, masic_data, fractions, samples,
conv <- fetch_conversion_table(org_name, from = "REFSEQ", "ENTREZID") %>%
inner_join(., conv)

results_ratio <- inner_join(results_ratio, conv) %>%
results_ratio <- left_join(results_ratio, conv) %>%
rename(gene_symbol = SYMBOL,
entrez_id = ENTREZID) %>%
select(-REFSEQ)
Expand All @@ -294,7 +301,8 @@ make_results_ratio_ph <- function(msnid, masic_data, fractions, samples,
summarize(ptm_score = min(peptide_score),
confident_score = max(confident_score)) %>%
mutate(confident_site = case_when(confident_score >= 17 ~ TRUE,
confident_score < 17 ~ FALSE))
confident_score < 17 ~ FALSE),
is_contaminant = grepl("Contaminant", protein_id))

results_ratio <- inner_join(results_ratio, ids) %>%
mutate(ptm_id = gsub("-", sep, ptm_id))
Expand Down

0 comments on commit e50afd2

Please sign in to comment.