-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathdscquery.Rd
225 lines (200 loc) · 9.96 KB
/
dscquery.Rd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dscquery.R
\name{dscquery}
\alias{dscquery}
\title{R interface for querying DSC output.}
\usage{
dscquery(
dsc.outdir,
targets = NULL,
module.output.all = NULL,
module.output.files = NULL,
conditions = NULL,
groups = NULL,
dsc.outfile = NULL,
return.type = c("auto", "data.frame", "list"),
ignore.missing.files = FALSE,
exec = "dsc-query",
verbose = TRUE
)
}
\arguments{
\item{dsc.outdir}{Directory where the DSC output is stored.}
\item{targets}{Query targets specified as a character vector; for
example, \code{targets = c("simulate.n","analyze","score.error")}.
A query target may be a module, a module group, a module parameter,
or a module output. Targets that are not assigned are set to
\code{NA}. This argument specifies the \code{--target} flag in the
\code{dsc-query} call. At least one target must be chosen;
\code{targets} cannot be \code{NULL} or empty. These targets will
be the names of the columns in the data frame if a data frame is
returned, or the names of the list elements if a list is returned.
This input argument specifies the \code{--target} option in the
\code{dsc-query} call. A special target is \code{<module>.DSC_TIME}
which will extract the runtime of the module.}
\item{module.output.all}{Character vector specifying names of
modules or module groups in the DSC. For each specified module or
module group, an additional list element is provided containing the
full module outputs, as well as information recorded by DSC such as
the runtime and the replicate number (see the \code{"DSC_DEBUG"}
element). This option can be useful for testing or debugging. Note
that any module or module group included in
\code{module.output.all} must also be included in \code{targets}.}
\item{module.output.files}{Character vector specifying names of
modules or module groups in the DSC. For each specified module or
module group, an additional data frame column (or list element)
giving the name of the DSC output file is provided. This can be
useful if you want to manually load the stored results (e.g., for
testing or debugging). For more details on DSC output files, how to
interpret the file paths, and how to import the contents of these
files into R, see \code{\link{dscread}}. This option can be useful
for testing or debugging. Note that any module or module group
included in \code{module.output.files} must also be included in
\code{targets}.}
\item{conditions}{Conditions used to filter DSC pipeline results;
rows in which one or more of the conditions evaluate to
\code{FALSE} or \code{NA} are removed from the output (removing
conditions that evaluate to \code{NA} is convention used by
\code{\link{which}}). When \code{conditions = NULL}, no additional
filtering of DSC pipelines is performed. Although results can
always be filtered \emph{post hoc}, using \code{conditions} to
filter can significantly speed up queries when the DSC outputs are
very large, as this will filter results, whenever possible,
\emph{before} they are loaded into R. Query conditions are
specified as R expressions, in which target names are written as
\code{$(...)}; for example, to request only results in which the
value of parameter \code{sigma} in module \code{simulate} is
greater than or equal to \code{0.1}, set \code{conditions =
"$(simulate.sigma) >= 0.1"} (see below for additional
examples). This input argument specifies the \code{--condition}
flag in the call to \code{dsc-query}. All targets used in the
conditions must also be included in \code{targets}.}
\item{groups}{Defines module groups. This argument specifies the
\code{--groups} flag in the call to \code{dsc-query}. For example,
\code{groups = c("method: mean median", "score: abs_err sqrt_err")}
will define two module groups, \code{method} and \code{score}.}
\item{dsc.outfile}{This optional input argument can be used to
provide a previously generated output from the \code{dsc-query}
program, in which case it must be the pathname of the output
file. This input is mainly intended to be used by developers and
expert users for testing or to reproduce previous queries since the
\code{dsc-query} output file must exactly agree in the query
arguments, otherwise unexpected errors could occur.}
\item{return.type}{If \code{return.type = "data.frame"}, the DSC
outputs are returned in a data frame; if \code{return.type =
"list"}, the DSC output a list. If \code{return.type = "auto"}, a
list or data frame is returned depending on which data structure is
most appropriate for the DSC outputs. See "Value" for more
information about the different return types, and the benefits (and
limitations) of each. Note that \code{return.type = "data.frame"}
cannot be used when one or more modules or module groups are named
in \code{module.output.files}.}
\item{ignore.missing.files}{If \code{ignore.missing.files = TRUE},
all targets corresponding to DSC output files that cannot be found,
or cannot be read (e.g., because they are corrupted), will be
treated as if the targets are not assigned (\code{NA}). If
\code{ignore.missing.files = FALSE}, \code{dscquery} will generate
an error whenever a file cannot be found or read.}
\item{exec}{The command or pathname of the \code{dsc-query}
executable.}
\item{verbose}{If \code{verbose = TRUE}, print progress of DSC
query command to the console.}
}
\value{
A list or data frame containing the result of the DSC
query.
When \code{return.type = "data.frame"}, the output is a data frame.
When possible, DSC outputs are extracted into the columns of the
data frame; when this is not possible (e.g., for more complex
outputs such as matrices), file names containing the DSC outputs
are provided instead. A data frame is most convenient with the
outputs are not complex.
When \code{return.type = "list"}, the output is a list, with list
elements corresponding to the query targets. Each top-level list
element should have the same length.
When \code{return.type = "auto"}, DSC outputs are extracted into
the columns of the data frame unless one or more outputs are large
or complex objects, in which case the return value is a list.
Note that a list can sometimes be converted to a data frame using
\code{\link{as.data.frame}}, or converted to a "tibble" using the
\code{\link[tibble]{as_tibble}} function from the tibble package.
All targets specified by the "targets" and "targets.notreq"
arguments, except for targets that are module names, should have
columns (or list elements) of the same name in the output.
Whenever a target of the form "x.y" is requested, where "x" is a
module group and "y" is a module parameter or output, an additional
output for the module group is automatically included. Additional
outputs giving file names of the DSC results files are included for
all targets that are modules or module groups.
When targets are unassigned, these are stored as missing values
(\code{NA}).
}
\description{
This is an R interface to the \code{dsc-query} program
for conveniently extracting and exploring DSC results within the R
environment. For additional information, run
\code{system("dsc-query --help")}.
}
\details{
A call to dscquery cannot include targets that involve
both a module, and a module group containing that module. For
example, setting \code{targets = c("mean.est","analyze")} will
generate an error if "mean" is a module, and it is a member of the
"analyze" module group.
This function may not work in Windows.
}
\examples{
# Retrieve the number of samples ("simulate.n") and error summary
# ("score.error") from all simulations in the "one_sample_location"
# DSC experiment.
dsc.dir <- system.file("datafiles","one_sample_location",
"dsc_result",package = "dscrutils")
dat1 <- dscquery(dsc.dir,
targets = c("simulate.n","analyze","score.error"))
# Retrieve the results only for simulations in which the "mean" module
# was run. Because this is a condition for a module name, it is
# applied before loading the full set of results into R. Therefore,
# this type of filtering step can speed up the query when there are
# many simulation results.
dat2 <- dscquery(dsc.dir,
targets = c("simulate.n","analyze","score.error"),
conditions = "$(analyze) == 'mean'")
# Return results only for simulations in which the error summary is
# greater than 0.2. This condition is applied only after loading the
# full set of results into R. Therefore, this type of condition will not
# reduce the query runtime.
dat3 <- dscquery(dsc.dir,
targets = c("simulate.n","analyze","score.error"),
conditions = "$(score.error) > 0.2")
# Retrieve the DSC results only for simulations in which the "mean"
# module was run, and which which the error summary is greater than
# 0.2. The conditions in this case are applied both before and after
# loading results into R.
dat4 <- dscquery(dsc.dir,
targets = c("simulate.n","analyze","score.error"),
conditions = c("$(score.error) > 0.2",
"$(analyze) == 'median'"))
# Retrieve some results from the "ash" DSC experiment. In this
# example, the beta estimates are vectors, so the results are
# extracted into a list by default.
dsc.dir2 <- system.file("datafiles","ash","dsc_result",
package = "dscrutils")
dat5 <-
dscquery(dsc.dir2,
targets = c("simulate.nsamp","simulate.g","shrink.mixcompdist",
"shrink.beta_est","shrink.pi0_est"),
conditions = "$(simulate.g)=='list(c(2/3,1/3),c(0,0),c(1,2))'")
# This is the same as the previous example, but extracts the results
# into data frame. Since the vectors cannot be stored in a data frame,
# the names of the files storing the vectors are returned instead.
dat6 <-
dscquery(dsc.dir2,
targets = c("simulate.nsamp","simulate.g","shrink.mixcompdist",
"shrink.beta_est","shrink.pi0_est"),
conditions = "$(simulate.g)=='list(c(2/3,1/3),c(0,0),c(1,2))'",
return.type = "data.frame")
# See also example("dscread").
}
\seealso{
\code{\link{dscread}}
}