From ead8b13d58b6b02e931296c795e82f715f4bdc10 Mon Sep 17 00:00:00 2001 From: npcooley Date: Fri, 9 Aug 2024 11:07:01 -0400 Subject: [PATCH] small edits and man page fixes --- R/ExpandDiagonal.R | 10 +++++----- R/SummarizePairs.R | 6 ++++-- man/ExpandDiagonal.Rd | 41 +++++++++++++++++++++++------------------ man/PrepareSeqs.Rd | 23 ++++++++++------------- man/SummarizePairs.Rd | 36 ++++++++++++++++++++++-------------- man/SuperTree.Rd | 2 +- 6 files changed, 65 insertions(+), 53 deletions(-) diff --git a/R/ExpandDiagonal.R b/R/ExpandDiagonal.R index 8aa6afe..fcbc068 100644 --- a/R/ExpandDiagonal.R +++ b/R/ExpandDiagonal.R @@ -3,8 +3,8 @@ # contact: npc19@pitt.edu / npcooley@gmail.com ExpandDiagonal <- function(SynExtendObject, - DataBase, - InheritConfidence = TRUE, + DataBase01, + InheritConfidence = FALSE, GapTolerance = 100L, DropSingletons = FALSE, UserConfidence = list("PID" = 0.3), @@ -20,7 +20,7 @@ ExpandDiagonal <- function(SynExtendObject, stop ("SynExtendObject must be an object of class 'PairSummaries'.") } # check DBPATH first - if (is.character(DataBase)) { + if (is.character(DataBase01)) { if (!requireNamespace(package = "RSQLite", quietly = TRUE)) { stop("Package 'RSQLite' must be installed.") @@ -29,10 +29,10 @@ ExpandDiagonal <- function(SynExtendObject, print("Eventually character vector access to DECIPHER DBs will be deprecated.") require(RSQLite, quietly = TRUE) } - dbConn <- dbConnect(dbDriver("SQLite"), DataBase) + dbConn <- dbConnect(dbDriver("SQLite"), DataBase01) on.exit(dbDisconnect(dbConn)) } else { - dbConn <- DataBase + dbConn <- DataBase01 if (!dbIsValid(dbConn)) { stop("The connection has expired.") } diff --git a/R/SummarizePairs.R b/R/SummarizePairs.R index 48fb7ef..abae534 100644 --- a/R/SummarizePairs.R +++ b/R/SummarizePairs.R @@ -421,6 +421,7 @@ SummarizePairs <- function(SynExtendObject, DataPool[[m1]]$len <- width(DataPool[[m1]]$DNA) DataPool[[m1]]$mod <- DataPool[[m1]]$len %% 3L == 0 DataPool[[m1]]$code <- GeneCalls[[m1]]$Coding + DataPool[[m1]]$cds <- lengths(GeneCalls[[m1]]$Range) # DBQUERY <- paste("select len, mod, code, cds from NTs where identifier is", # ObjectIDs[m1]) # DBOUT <- dbGetQuery(conn = dbConn, @@ -459,6 +460,7 @@ SummarizePairs <- function(SynExtendObject, DataPool[[m2]]$len <- width(DataPool[[m2]]$DNA) DataPool[[m2]]$mod <- DataPool[[m2]]$len %% 3L == 0 DataPool[[m2]]$code <- GeneCalls[[m2]]$Coding + DataPool[[m2]]$cds <- lengths(GeneCalls[[m2]]$Range) # DBQUERY <- paste("select len, mod, code, cds from NTs where identifier is", # ObjectIDs[m2]) # DBOUT <- dbGetQuery(conn = dbConn, @@ -486,7 +488,7 @@ SummarizePairs <- function(SynExtendObject, QNTCount <- DataPool[[m1]]$len QMod <- DataPool[[m1]]$mod QCode <- DataPool[[m1]]$code - # QCDSCount <- DataPool[[m1]]$cds + QCDSCount <- DataPool[[m1]]$cds QueryStruct <- DataPool[[m1]]$struct } else { # do something else? @@ -497,7 +499,7 @@ SummarizePairs <- function(SynExtendObject, SNTCount <- DataPool[[m2]]$len SMod <- DataPool[[m2]]$mod SCode <- DataPool[[m2]]$code - # SCDSCount <- DataPool[[m2]]$cds + SCDSCount <- DataPool[[m2]]$cds SubjectStruct <- DataPool[[m2]]$struct # align everyone as AAs who can be, i.e. modulo of 3, is coding, etc diff --git a/man/ExpandDiagonal.Rd b/man/ExpandDiagonal.Rd index ce6afef..2ccf4d5 100644 --- a/man/ExpandDiagonal.Rd +++ b/man/ExpandDiagonal.Rd @@ -8,9 +8,8 @@ Attempt to expand blocks of paired features in a \code{PairSummaries} object. } \usage{ ExpandDiagonal(SynExtendObject, - FeatureSeqs, - DataBase, - InheritConfidence = TRUE, + DataBase01, + InheritConfidence = FALSE, GapTolerance = 100L, DropSingletons = FALSE, UserConfidence = list("PID" = 0.3), @@ -20,10 +19,7 @@ ExpandDiagonal(SynExtendObject, \item{SynExtendObject}{ An object of class \code{PairSummaries}. } - \item{FeatureSeqs}{ -An object of class \code{FeatureSeqs}. -} - \item{DataBase}{ + \item{DataBase01}{ A character string pointing to a SQLite database, or a connection to a \code{DECIPHER} database. } \item{InheritConfidence}{ @@ -36,7 +32,7 @@ Integer value indicating the \code{diff} between feature IDs that can be tolerat Ignore solo pairs when planning expansion routes. Set to \code{FALSE} by default. } \item{UserConfidence}{ -A named list of length 1 where the name identifies a column of the \code{PairSummaries} object, and the value identifies a user confidence. Every k-means cluster with a center value of the column value selected greater than the confidence is retained. +A named list of length 1 where the name identifies a column of the \code{PairSummaries} object, and the value identifies a user confidence. To be retained, a pair evaluated for expansion must be above all user specified confidences. } \item{Verbose}{ Logical indicating whether or not to display a progress bar and print the time difference upon completion. @@ -56,18 +52,27 @@ Nicholas Cooley \email{npc19@pitt.edu} \code{\link{PairSummaries}}, \code{\link{NucleotideOverlap}}, \code{link{SubSetPairs}}, \code{\link{FindSynteny}} } \examples{ +library(RSQLite) DBPATH <- system.file("extdata", "Endosymbionts_v02.sqlite", package = "SynExtend") - -data("Endosymbionts_LinkedFeatures", package = "SynExtend") -Endosymbiont_Seqs <- PrepareSeqs(SynExtendObject = Endosymbionts_LinkedFeatures, - DataBase = DBPATH, - Verbose = TRUE) +tmp <- tempfile() +system(command = paste("cp", + DBPATH, + tmp)) +DBCONN <- dbConnect(SQLite(), tmp) -data("Endosymbionts_Pairs02", package = "SynExtend") -Pairs03 <- ExpandDiagonal(SynExtendObject = Endosymbionts_Pairs02, - DataBase = DBPATH, - FeatureSeqs = Endosymbiont_Seqs, - Verbose = TRUE) +data("Endosymbionts_LinkedFeatures", package = "SynExtend") +PrepareSeqs(SynExtendObject = Endosymbionts_LinkedFeatures, + DataBase01 = DBCONN, + Verbose = TRUE) +SummarizedPairs <- SummarizePairs(SynExtendObject = Endosymbionts_LinkedFeatures, + DataBase01 = DBCONN, + Verbose = TRUE) +ExpandedPairs <- ExpandDiagonal(SynExtendObject = SummarizedPairs, + DataBase01 = DBCONN, + Verbose = TRUE) +dbDisconnect(DBCONN) +unlink(tmp) + } diff --git a/man/PrepareSeqs.Rd b/man/PrepareSeqs.Rd index 1eb3f24..70dc853 100644 --- a/man/PrepareSeqs.Rd +++ b/man/PrepareSeqs.Rd @@ -1,24 +1,23 @@ \name{PrepareSeqs} \alias{PrepareSeqs} \title{ -Return gene sequences. +Add feature sequences to Decipher databases. } \description{ -Given a \code{SynExtend} object with a \code{GeneCalls} attribute, and a \code{DECIPHER} database, return all gene sequences and their translations. +Given a \code{SynExtend} object with a \code{GeneCalls} attribute, and a \code{DECIPHER} database, add sequence tables named 'AAs' and 'NTs' to the database. The new tables contain all translatable sequences indicated by the genecalls, and all nucleotide feature sequences. } \usage{ PrepareSeqs(SynExtendObject, - DataBase, + DataBase01, DefaultTranslationTable = "11", Identifiers = NULL, - Storage = 1, Verbose = FALSE) } \arguments{ \item{SynExtendObject}{ An object of class \code{PairSummaries} or of \code{LinkedPairs}. Object must have a \code{GeneCalls} attribute. } - \item{DataBase}{ + \item{DataBase01}{ A character string pointing to a SQLite database, or a connection to a \code{DECIPHER} database. } \item{DefaultTranslationTable}{ @@ -26,19 +25,16 @@ A character vector of length 1 identifying the translation table to use if one i } \item{Identifiers}{ By default \code{NULL}, but can be used to supply a vector of character identifiers for returning a subset of prepared sequences. -} - \item{Storage}{ -A soft memory limit for how much space to allow when building the resulting object. Translated to Gb. } \item{Verbose}{ Logical indicating whether or not to display a progress bar and print the time difference upon completion. } } \details{ -\code{PrepareSeqs} returns the sequences of genes and their translations where appropriate. +\code{PrepareSeqs} adds two tables to a DECIPHER database. One named 'AAs' that contains all translatable features, i.e. features with a coding length divisible by 3 and designated as coding. And another named 'NTs' which contains all features. } \value{ -An object of class \code{FeatureSeqs}. +An integer count of the number of feature sets added to the DECIPHER database. } \author{ Nicholas Cooley \email{npc19@pitt.edu} @@ -53,7 +49,8 @@ DBPATH <- system.file("extdata", package = "SynExtend") data("Endosymbionts_LinkedFeatures", package = "SynExtend") -CurrentSeqs <- PrepareSeqs(SynExtendObject = Endosymbionts_LinkedFeatures, - DataBase = DBPATH, - Verbose = TRUE) +# this will add seqs to the DB +# PrepareSeqs(SynExtendObject = Endosymbionts_LinkedFeatures, +# DataBase = DBPATH, +# Verbose = TRUE) } diff --git a/man/SummarizePairs.Rd b/man/SummarizePairs.Rd index 94017de..49cc684 100644 --- a/man/SummarizePairs.Rd +++ b/man/SummarizePairs.Rd @@ -4,12 +4,11 @@ Provide summaries of hypothetical orthologs. } \description{ -Given the correct set of \code{SynExtend} objects and a \code{DECIPHER} database, return a data.frame of summarized genomic feature pairs. \code{SummarizePairs} will collect all the linked genomic features in the supplied \code{\link{LinkedPairs-class}} object and return descriptions of the alignments of those features. +Given \code{LinkedPairs} object and a \code{DECIPHER} database, return a data.frame of summarized genomic feature pairs. \code{SummarizePairs} will collect all the linked genomic features in the supplied \code{\link{LinkedPairs-class}} object and return descriptions of the alignments of those features. } \usage{ SummarizePairs(SynExtendObject, - FeatureSeqs, - DataBase, + DataBase01, AlignmentFun = "AlignProfiles", RetainAnchors = FALSE, DefaultTranslationTable = "11", @@ -18,16 +17,14 @@ SummarizePairs(SynExtendObject, Verbose = FALSE, ShowPlot = FALSE, Processors = 1, + Storage = 2, ...) } \arguments{ \item{SynExtendObject}{ An object of class \code{LinkedPairs-class}. } - \item{FeatureSeqs}{ -An object of class \code{FeatureSeqs}. -} - \item{DataBase}{ + \item{DataBase01}{ A character string pointing to a SQLite database, or a connection to a \code{DECIPHER} database. } \item{AlignmentFun}{ @@ -43,16 +40,19 @@ A character vector of length 1 identifying the translation table to use if one i An integer specifying what Kmer size to collect Kmer distance between sequences at. } \item{IgnoreDefaultStringSet}{ -A soft memory limit for how much space to allow when building the resulting object. Translated to Gb. +Translate all sequences in nucleotide space. } \item{Verbose}{ Logical indicating whether or not to display a progress bar and print the time difference upon completion. } \item{ShowPlot}{ -Logical indicating whether or not to provide a plot of features collected by the function. +Logical indicating whether or not to provide a plot of features collected by the function. Currently not implemented. } \item{Processors}{ An integer value indicating how many processors to supply to \code{\link{AlignPairs}}. +} + \item{Storage}{ +A soft memory limit for how much sequence data from the database to retain in memory while running. In Gb. } \item{...}{ Additional arguments to pass to interior functions. Currently not implemented. @@ -72,16 +72,24 @@ Nicholas Cooley \email{npc19@pitt.edu} \code{\link{PrepareSeqs}}, \code{\link{NucleotideOverlap}}, \code{\link{FindSynteny}}, \code{\link{LinkedPairs-class}} } \examples{ +library(RSQLite) DBPATH <- system.file("extdata", "Endosymbionts_v02.sqlite", package = "SynExtend") +tmp <- tempfile() +system(command = paste("cp", + DBPATH, + tmp)) +DBCONN <- dbConnect(SQLite(), tmp) data("Endosymbionts_LinkedFeatures", package = "SynExtend") -Endosymbiont_Seqs <- PrepareSeqs(SynExtendObject = Endosymbionts_LinkedFeatures, - DataBase = DBPATH, - Verbose = TRUE) +PrepareSeqs(SynExtendObject = Endosymbionts_LinkedFeatures, + DataBase01 = DBCONN, + Verbose = TRUE) SummarizedPairs <- SummarizePairs(SynExtendObject = Endosymbionts_LinkedFeatures, - FeatureSeqs = Endosymbiont_Seqs, - DataBase = DBPATH) + DataBase01 = DBCONN, + Verbose = TRUE) +dbDisconnect(DBCONN) +unlink(tmp) } diff --git a/man/SuperTree.Rd b/man/SuperTree.Rd index 0e7998d..db14c44 100644 --- a/man/SuperTree.Rd +++ b/man/SuperTree.Rd @@ -70,7 +70,7 @@ data("SuperTreeEx", package="SynExtend") # Notice that the labels of the tree are in #_#_# format # See the man page for SuperTreeEx for more info labs <- labels(exData[[1]]) -if(interative()) print(labs) +if(interactive()) print(labs) # The first number corresponds to the species, # so we need to trim the rest in each leaf label