Skip to content

Commit

Permalink
small edits and man page fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
npcooley committed Aug 9, 2024
1 parent 3d61970 commit ead8b13
Show file tree
Hide file tree
Showing 6 changed files with 65 additions and 53 deletions.
10 changes: 5 additions & 5 deletions R/ExpandDiagonal.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
# contact: npc19@pitt.edu / npcooley@gmail.com

ExpandDiagonal <- function(SynExtendObject,
DataBase,
InheritConfidence = TRUE,
DataBase01,
InheritConfidence = FALSE,
GapTolerance = 100L,
DropSingletons = FALSE,
UserConfidence = list("PID" = 0.3),
Expand All @@ -20,7 +20,7 @@ ExpandDiagonal <- function(SynExtendObject,
stop ("SynExtendObject must be an object of class 'PairSummaries'.")
}
# check DBPATH first
if (is.character(DataBase)) {
if (is.character(DataBase01)) {
if (!requireNamespace(package = "RSQLite",
quietly = TRUE)) {
stop("Package 'RSQLite' must be installed.")
Expand All @@ -29,10 +29,10 @@ ExpandDiagonal <- function(SynExtendObject,
print("Eventually character vector access to DECIPHER DBs will be deprecated.")
require(RSQLite, quietly = TRUE)
}
dbConn <- dbConnect(dbDriver("SQLite"), DataBase)
dbConn <- dbConnect(dbDriver("SQLite"), DataBase01)
on.exit(dbDisconnect(dbConn))
} else {
dbConn <- DataBase
dbConn <- DataBase01
if (!dbIsValid(dbConn)) {
stop("The connection has expired.")
}
Expand Down
6 changes: 4 additions & 2 deletions R/SummarizePairs.R
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,7 @@ SummarizePairs <- function(SynExtendObject,
DataPool[[m1]]$len <- width(DataPool[[m1]]$DNA)
DataPool[[m1]]$mod <- DataPool[[m1]]$len %% 3L == 0
DataPool[[m1]]$code <- GeneCalls[[m1]]$Coding
DataPool[[m1]]$cds <- lengths(GeneCalls[[m1]]$Range)
# DBQUERY <- paste("select len, mod, code, cds from NTs where identifier is",
# ObjectIDs[m1])
# DBOUT <- dbGetQuery(conn = dbConn,
Expand Down Expand Up @@ -459,6 +460,7 @@ SummarizePairs <- function(SynExtendObject,
DataPool[[m2]]$len <- width(DataPool[[m2]]$DNA)
DataPool[[m2]]$mod <- DataPool[[m2]]$len %% 3L == 0
DataPool[[m2]]$code <- GeneCalls[[m2]]$Coding
DataPool[[m2]]$cds <- lengths(GeneCalls[[m2]]$Range)
# DBQUERY <- paste("select len, mod, code, cds from NTs where identifier is",
# ObjectIDs[m2])
# DBOUT <- dbGetQuery(conn = dbConn,
Expand Down Expand Up @@ -486,7 +488,7 @@ SummarizePairs <- function(SynExtendObject,
QNTCount <- DataPool[[m1]]$len
QMod <- DataPool[[m1]]$mod
QCode <- DataPool[[m1]]$code
# QCDSCount <- DataPool[[m1]]$cds
QCDSCount <- DataPool[[m1]]$cds
QueryStruct <- DataPool[[m1]]$struct
} else {
# do something else?
Expand All @@ -497,7 +499,7 @@ SummarizePairs <- function(SynExtendObject,
SNTCount <- DataPool[[m2]]$len
SMod <- DataPool[[m2]]$mod
SCode <- DataPool[[m2]]$code
# SCDSCount <- DataPool[[m2]]$cds
SCDSCount <- DataPool[[m2]]$cds
SubjectStruct <- DataPool[[m2]]$struct

# align everyone as AAs who can be, i.e. modulo of 3, is coding, etc
Expand Down
41 changes: 23 additions & 18 deletions man/ExpandDiagonal.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,8 @@ Attempt to expand blocks of paired features in a \code{PairSummaries} object.
}
\usage{
ExpandDiagonal(SynExtendObject,
FeatureSeqs,
DataBase,
InheritConfidence = TRUE,
DataBase01,
InheritConfidence = FALSE,
GapTolerance = 100L,
DropSingletons = FALSE,
UserConfidence = list("PID" = 0.3),
Expand All @@ -20,10 +19,7 @@ ExpandDiagonal(SynExtendObject,
\item{SynExtendObject}{
An object of class \code{PairSummaries}.
}
\item{FeatureSeqs}{
An object of class \code{FeatureSeqs}.
}
\item{DataBase}{
\item{DataBase01}{
A character string pointing to a SQLite database, or a connection to a \code{DECIPHER} database.
}
\item{InheritConfidence}{
Expand All @@ -36,7 +32,7 @@ Integer value indicating the \code{diff} between feature IDs that can be tolerat
Ignore solo pairs when planning expansion routes. Set to \code{FALSE} by default.
}
\item{UserConfidence}{
A named list of length 1 where the name identifies a column of the \code{PairSummaries} object, and the value identifies a user confidence. Every k-means cluster with a center value of the column value selected greater than the confidence is retained.
A named list of length 1 where the name identifies a column of the \code{PairSummaries} object, and the value identifies a user confidence. To be retained, a pair evaluated for expansion must be above all user specified confidences.
}
\item{Verbose}{
Logical indicating whether or not to display a progress bar and print the time difference upon completion.
Expand All @@ -56,18 +52,27 @@ Nicholas Cooley \email{npc19@pitt.edu}
\code{\link{PairSummaries}}, \code{\link{NucleotideOverlap}}, \code{link{SubSetPairs}}, \code{\link{FindSynteny}}
}
\examples{
library(RSQLite)
DBPATH <- system.file("extdata",
"Endosymbionts_v02.sqlite",
package = "SynExtend")

data("Endosymbionts_LinkedFeatures", package = "SynExtend")
Endosymbiont_Seqs <- PrepareSeqs(SynExtendObject = Endosymbionts_LinkedFeatures,
DataBase = DBPATH,
Verbose = TRUE)
tmp <- tempfile()
system(command = paste("cp",
DBPATH,
tmp))
DBCONN <- dbConnect(SQLite(), tmp)

data("Endosymbionts_Pairs02", package = "SynExtend")
Pairs03 <- ExpandDiagonal(SynExtendObject = Endosymbionts_Pairs02,
DataBase = DBPATH,
FeatureSeqs = Endosymbiont_Seqs,
Verbose = TRUE)
data("Endosymbionts_LinkedFeatures", package = "SynExtend")
PrepareSeqs(SynExtendObject = Endosymbionts_LinkedFeatures,
DataBase01 = DBCONN,
Verbose = TRUE)
SummarizedPairs <- SummarizePairs(SynExtendObject = Endosymbionts_LinkedFeatures,
DataBase01 = DBCONN,
Verbose = TRUE)
ExpandedPairs <- ExpandDiagonal(SynExtendObject = SummarizedPairs,
DataBase01 = DBCONN,
Verbose = TRUE)
dbDisconnect(DBCONN)
unlink(tmp)

}
23 changes: 10 additions & 13 deletions man/PrepareSeqs.Rd
Original file line number Diff line number Diff line change
@@ -1,44 +1,40 @@
\name{PrepareSeqs}
\alias{PrepareSeqs}
\title{
Return gene sequences.
Add feature sequences to Decipher databases.
}
\description{
Given a \code{SynExtend} object with a \code{GeneCalls} attribute, and a \code{DECIPHER} database, return all gene sequences and their translations.
Given a \code{SynExtend} object with a \code{GeneCalls} attribute, and a \code{DECIPHER} database, add sequence tables named 'AAs' and 'NTs' to the database. The new tables contain all translatable sequences indicated by the genecalls, and all nucleotide feature sequences.
}
\usage{
PrepareSeqs(SynExtendObject,
DataBase,
DataBase01,
DefaultTranslationTable = "11",
Identifiers = NULL,
Storage = 1,
Verbose = FALSE)
}
\arguments{
\item{SynExtendObject}{
An object of class \code{PairSummaries} or of \code{LinkedPairs}. Object must have a \code{GeneCalls} attribute.
}
\item{DataBase}{
\item{DataBase01}{
A character string pointing to a SQLite database, or a connection to a \code{DECIPHER} database.
}
\item{DefaultTranslationTable}{
A character vector of length 1 identifying the translation table to use if one is not supplied in the \code{GeneCalls} attribute.
}
\item{Identifiers}{
By default \code{NULL}, but can be used to supply a vector of character identifiers for returning a subset of prepared sequences.
}
\item{Storage}{
A soft memory limit for how much space to allow when building the resulting object. Translated to Gb.
}
\item{Verbose}{
Logical indicating whether or not to display a progress bar and print the time difference upon completion.
}
}
\details{
\code{PrepareSeqs} returns the sequences of genes and their translations where appropriate.
\code{PrepareSeqs} adds two tables to a DECIPHER database. One named 'AAs' that contains all translatable features, i.e. features with a coding length divisible by 3 and designated as coding. And another named 'NTs' which contains all features.
}
\value{
An object of class \code{FeatureSeqs}.
An integer count of the number of feature sets added to the DECIPHER database.
}
\author{
Nicholas Cooley \email{npc19@pitt.edu}
Expand All @@ -53,7 +49,8 @@ DBPATH <- system.file("extdata",
package = "SynExtend")

data("Endosymbionts_LinkedFeatures", package = "SynExtend")
CurrentSeqs <- PrepareSeqs(SynExtendObject = Endosymbionts_LinkedFeatures,
DataBase = DBPATH,
Verbose = TRUE)
# this will add seqs to the DB
# PrepareSeqs(SynExtendObject = Endosymbionts_LinkedFeatures,
# DataBase = DBPATH,
# Verbose = TRUE)
}
36 changes: 22 additions & 14 deletions man/SummarizePairs.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,11 @@
Provide summaries of hypothetical orthologs.
}
\description{
Given the correct set of \code{SynExtend} objects and a \code{DECIPHER} database, return a data.frame of summarized genomic feature pairs. \code{SummarizePairs} will collect all the linked genomic features in the supplied \code{\link{LinkedPairs-class}} object and return descriptions of the alignments of those features.
Given \code{LinkedPairs} object and a \code{DECIPHER} database, return a data.frame of summarized genomic feature pairs. \code{SummarizePairs} will collect all the linked genomic features in the supplied \code{\link{LinkedPairs-class}} object and return descriptions of the alignments of those features.
}
\usage{
SummarizePairs(SynExtendObject,
FeatureSeqs,
DataBase,
DataBase01,
AlignmentFun = "AlignProfiles",
RetainAnchors = FALSE,
DefaultTranslationTable = "11",
Expand All @@ -18,16 +17,14 @@ SummarizePairs(SynExtendObject,
Verbose = FALSE,
ShowPlot = FALSE,
Processors = 1,
Storage = 2,
...)
}
\arguments{
\item{SynExtendObject}{
An object of class \code{LinkedPairs-class}.
}
\item{FeatureSeqs}{
An object of class \code{FeatureSeqs}.
}
\item{DataBase}{
\item{DataBase01}{
A character string pointing to a SQLite database, or a connection to a \code{DECIPHER} database.
}
\item{AlignmentFun}{
Expand All @@ -43,16 +40,19 @@ A character vector of length 1 identifying the translation table to use if one i
An integer specifying what Kmer size to collect Kmer distance between sequences at.
}
\item{IgnoreDefaultStringSet}{
A soft memory limit for how much space to allow when building the resulting object. Translated to Gb.
Translate all sequences in nucleotide space.
}
\item{Verbose}{
Logical indicating whether or not to display a progress bar and print the time difference upon completion.
}
\item{ShowPlot}{
Logical indicating whether or not to provide a plot of features collected by the function.
Logical indicating whether or not to provide a plot of features collected by the function. Currently not implemented.
}
\item{Processors}{
An integer value indicating how many processors to supply to \code{\link{AlignPairs}}.
}
\item{Storage}{
A soft memory limit for how much sequence data from the database to retain in memory while running. In Gb.
}
\item{...}{
Additional arguments to pass to interior functions. Currently not implemented.
Expand All @@ -72,16 +72,24 @@ Nicholas Cooley \email{npc19@pitt.edu}
\code{\link{PrepareSeqs}}, \code{\link{NucleotideOverlap}}, \code{\link{FindSynteny}}, \code{\link{LinkedPairs-class}}
}
\examples{
library(RSQLite)
DBPATH <- system.file("extdata",
"Endosymbionts_v02.sqlite",
package = "SynExtend")
tmp <- tempfile()
system(command = paste("cp",
DBPATH,
tmp))
DBCONN <- dbConnect(SQLite(), tmp)

data("Endosymbionts_LinkedFeatures", package = "SynExtend")
Endosymbiont_Seqs <- PrepareSeqs(SynExtendObject = Endosymbionts_LinkedFeatures,
DataBase = DBPATH,
Verbose = TRUE)
PrepareSeqs(SynExtendObject = Endosymbionts_LinkedFeatures,
DataBase01 = DBCONN,
Verbose = TRUE)
SummarizedPairs <- SummarizePairs(SynExtendObject = Endosymbionts_LinkedFeatures,
FeatureSeqs = Endosymbiont_Seqs,
DataBase = DBPATH)
DataBase01 = DBCONN,
Verbose = TRUE)
dbDisconnect(DBCONN)
unlink(tmp)

}
2 changes: 1 addition & 1 deletion man/SuperTree.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ data("SuperTreeEx", package="SynExtend")
# Notice that the labels of the tree are in #_#_# format
# See the man page for SuperTreeEx for more info
labs <- labels(exData[[1]])
if(interative()) print(labs)
if(interactive()) print(labs)

# The first number corresponds to the species,
# so we need to trim the rest in each leaf label
Expand Down

0 comments on commit ead8b13

Please sign in to comment.