Skip to content

Commit

Permalink
* Removed registry in favor of using R Studio auto-complete.
Browse files Browse the repository at this point in the history
* Outlier detector are now in class DSOutlier class.
* We use now roxygen2 for man pages.
* Abstract classes have now constructors.
  • Loading branch information
mhahsler committed Dec 16, 2021
1 parent 608edad commit fb73fee
Show file tree
Hide file tree
Showing 122 changed files with 6,447 additions and 3,042 deletions.
2 changes: 2 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,5 @@ URL: https://github.com/mhahsler/stream
BugReports: https://github.com/mhahsler/stream/issues
LinkingTo: Rcpp, BH
License: GPL-3
RoxygenNote: 7.1.2
Roxygen: list(markdown = TRUE)
25 changes: 18 additions & 7 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,14 @@ importFrom(clusterGeneration, "genPositiveDefMat")
importFrom(clue, "as.cl_hard_partition", "cl_agreement", "cl_ensemble")

export(
# DSC ... Data Stream Clusterer
# Data Stream Tasks
DST,

# Data Stream Clusterer
DSC,
DSC_R,
DSC_Micro,
DSC_Macro,
DSC_SinglePass,

DSC_BIRCH,
DSC_DStream,
Expand All @@ -53,10 +55,10 @@ export(
EvalCallback,
DefaultEvalCallback,

# DSOutlier ... Data Stream Outlier Detector
# Data Stream Outlier Detector
DSOutlier,

# DSD ... Data Stream Data
# Data Stream Data
DSD,
DSD_R,

Expand All @@ -75,12 +77,14 @@ export(
DSD_BarsAndGaussians,
DSD_Benchmark,

# DSO ... Data Stream Operator
# Data Stream Operator
DSO,
DSO_Window,
DSO_Sample,

# MGCs
# Moving cluster data generators
DSD_MG,
MGC,
MGC_Linear,
MGC_Function,
MGC_Random,
Expand All @@ -89,6 +93,12 @@ export(
MGC_Shape_Gaussian,
MGC_Shape_Block,

# Frequent pattern mining
DSFP,

# Classification
DSClassify,

# functions
# DSD
get_points,
Expand Down Expand Up @@ -136,6 +146,7 @@ export(

### DSOutlier
recheck_outlier,
clean_outliers,
get_outlier_positions,
noutliers,

Expand Down Expand Up @@ -200,7 +211,6 @@ S3method(get_assignment, DSC)
S3method(get_assignment, DSC_DBSTREAM)
S3method(get_assignment, DSC_DStream)
S3method(get_assignment, DSC_TwoStage)
S3method(get_assignment, DSC_SinglePass)

S3method(get_centers, DSC_Macro)
S3method(get_centers, DSC_Micro)
Expand All @@ -217,6 +227,7 @@ S3method(microToMacro, DSC_DBSTREAM)
S3method(microToMacro, DSC_DStream)
S3method(microToMacro, DSC_TwoStage)

S3method(clean_outliers, DSOutlier)
S3method(recheck_outlier, DSOutlier)
S3method(get_outlier_positions, DSOutlier)
S3method(noutliers, DSOutlier)
Expand Down
3 changes: 2 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
## Changes
* Removed registry in favor of using R Studio auto-complete.
* Outlier detector are now in class DSOutlier class.

* We use now roxygen2 for man pages.
* Abstract classes have now constructors.

## Bug Fixes
* Fixed typo in BIRCH interface: treshold -> threshold (by dinarior)
Expand Down
20 changes: 20 additions & 0 deletions R/AAA.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,4 +49,24 @@
.installed <- function(pkg) !is(try(utils::installed.packages()[pkg,],
silent=TRUE), "try-error")

abstract_class_generator <- function(prefix) {
function(...) {

message(prefix, " is an abstract class and cannot be instantiated!")

stream_pks <- sort(grep('^package:stream', search(), value = TRUE))
for(p in stream_pks) {
implementations <- grep(paste0('^', prefix, '_'), ls(p),
value = TRUE)
if(length(implementations) == 0) implementations <- "*None*"
message("\nAvailable subclasses in ", sQuote(p), " are:\n\t",
paste(implementations, collapse=",\n\t"))
}

message("\nTo get more information in R Studio, type ", sQuote(paste0(prefix, '_')),
" and hit the Tab key.")

invisible(NULL)
}
}

143 changes: 135 additions & 8 deletions R/DSC.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# the Free Sioftware Foundation; either version 2 of the License, or
# any later version.
#
# This program is distributed in the hope that it will be useful,
Expand All @@ -17,18 +17,84 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.


### DSC - Data Stream Clusterer interface
DSC <- function(...) stop("DSC is an abstract class and cannot be instantiated!")
#' Data Stream Clusterer Base Classes
#'
#' Abstract base classes for all DSC (Data Stream Clusterer) and DSC_R classes.
#' Concrete implementations are functions starting with `DSC_` (R Studio use auto-completion with Tab to select one).
#'
#' The `DSC} and `DSC_R` classes cannot be instantiated (calling
#' `DSC()` or `DSC_R()` produces only a message listing the available implementations),
#' but they serve as a base
#' class from which other DSC classes inherit.
#'
#' Class `DSC` provides several generic functions that can operate on all
#' DSC subclasses. See Functions section below.
#' Additional, separately documented functions are:
#'
#' * [update] Add new data points from a stream to a clustering.
#' * [plot] function is also provides for `DSC`.
#' * [get_assignment] Find out what cluster new data points would be assigned to.
#'
#' `get_centers` and `get_weights` are typically overwritten by
#' subclasses of `DSC`. `DSC_R` provides these functions for R-based
#' DSC implementations.
#'
#' Since `DSC` objects often contain external pointers, regular saving and
#' reading operations will fail. Use [saveDSC] and [readDSC]
#' which will serialize the objects first appropriately.
#'
#' @aliases DSC DSC_R
#' @param x a DSC object.
#' @param type Return weights of micro- or macro-clusters in x. Auto uses the
#' class of x to decide.
#' @param scale a range (from, to) to scale the weights. Returns by default
#' the raw weights.
#' @param ... further parameter
#' @author Michael Hahsler
#' @seealso [DSC_Micro], [DSC_Macro], [animate_cluster], [update],
#' [evaluate], [get_assignment],
#' [microToMacro],
#' [plot], [prune_clusters],
#' [recluster], [readDSC], [saveDSC]
#' @export DSC
#' @examples
#'
#' DSC()
#'
#' stream <- DSD_Gaussians(k=3, d=2)
#' dstream <- DSC_DStream(gridsize=.1)
#' update(dstream, stream, 500)
#' dstream
#'
#' # get micro-cluster centers
#' get_centers(dstream)
#'
#' # get the number of clusters
#' nclusters(dstream)
#'
#' # get the micro-cluster weights
#' get_weights(dstream)
#'
#' # D-Stream also has macro-clusters
#' get_weights(dstream, type="macro")
#'
#'
DSC<- abstract_class_generator("DSC")

### all DSC classes have these interface methods


#' @describeIn DSC Gets the cluster centers (micro- or macro-clusters) from a DSC object.
#' @export get_centers
get_centers <- function(x, type = c("auto", "micro", "macro"), ...)
UseMethod("get_centers")
get_centers.default <- function(x, type = c("auto", "micro", "macro"), ...) {
stop(gettextf("get_centers not implemented for class '%s'.",
paste(class(x), collapse=", ")))
}

### get MC weights. In case it is not implemented it returns 1s
#' @describeIn DSC Get the weights of the clusters in the DSC (returns 1s if not implemented by the clusterer)
#' @export get_weights
get_weights <- function(x, type=c("auto", "micro", "macro"), scale=NULL, ...)
UseMethod("get_weights")
get_weights.default <- function(x, type=c("auto", "micro", "macro"),
Expand All @@ -47,46 +113,54 @@ get_weights.default <- function(x, type=c("auto", "micro", "macro"),
#####################################################################3

### make a deep copy of the


#' @describeIn DSC Create a Deep Copy of a DSC Object that contain reference classes (e.g., Java data structures for MOA).
#' @export get_copy
get_copy <- function(x) UseMethod("get_copy")
get_copy.default <- function(x, ...) {
stop(gettextf("get_copy not implemented for class '%s'.",
paste(class(x), collapse=", ")))
}

#' @describeIn DSC Get micro-clusters if the object is a `DSC_Micro`.
get_microclusters <- function(x, ...) UseMethod("get_microclusters")
get_microclusters.DSC <- function(x, ...) {
stop(gettextf("No micro-clusters available for class '%s'.",
paste(class(x), collapse=", ")))
}

#' @describeIn DSC Get micro-clusters if the the object is a `DSC_Macro`.
get_macroclusters <- function(x, ...) UseMethod("get_macroclusters")
get_macroclusters.DSC <- function(x, ...) {
stop(gettextf("No macro-clusters available for class '%s'.",
paste(class(x), collapse=", ")))
}

#' @describeIn DSC Get micro-cluster weights if the object is a `DSC_Micro`.
get_microweights <- function(x, ...) UseMethod("get_microweights")
get_microweights.DSC <- function(x, ...) {
stop(gettextf("No weights for micro-clusters available for class '%s'.",
paste(class(x), collapse=", ")))
}

#' @describeIn DSC Get macro-cluster weights if the object is a `DSC_Macro`.
get_macroweights <- function(x, ...) UseMethod("get_macroweights")
get_macroweights.DSC <- function(x, ...) {
stop(gettextf("No weights for macro-clusters available for class '%s'.",
paste(class(x), collapse=", ")))
}


### derived functions, plot and print
#' @describeIn DSC Returns the number of micro-clusters from the DSC object.
#' @export nclusters
nclusters <- function(x, type=c("auto", "micro", "macro"), ...)
UseMethod("nclusters")

nclusters.DSC <- function(x, type=c("auto", "micro", "macro"), ...) {
nrow(get_centers(x, type=type, ...))
}


print.DSC <- function(x, ...) {
cat(.line_break(paste(x$description)))
cat("Class:", paste(class(x), collapse=", "), "\n")
Expand All @@ -98,7 +172,59 @@ print.DSC <- function(x, ...) {

summary.DSC <- function(object, ...) print(object)

#plot.DSC will call super question.
#' Plotting Data Stream Data and Clusterings
#'
#' Methods to plot data stream data and clusterings.
#'
#'
#' @aliases plot plot.DSD plot.DSC
#' @param x the DSD or DSC object to be plotted.
#' @param dsd a DSD object to plot the data in the background.
#' @param n number of plots taken from the dsd to plot.
#' @param col,col_points,col_clusters colors used for plotting.
#' @param weights the size of the symbols for micro- and macro-clusters
#' represents its weight.
#' @param scale range for the symbol sizes used.
#' @param cex size factor for symbols.
#' @param pch symbol type.
#' @param method method used for plotting: "pairs" (pairs plot), "scatter"
#' (scatter plot) or "pc" (plot first 2 principal components).
#' @param dim an integer vector with the dimensions to plot. If \code{NULL}
#' then for methods \code{"pairs"} and \code{"pc"} all dimensions are used and
#' for \code{"scatter"} the first two dimensions are plotted.
#' @param alpha alpha shading used to plot the points.
#' @param type Plot micro clusters (\code{type="micro"}), macro clusters
#' (\code{type="macro"}), both micro and macro clusters (\code{type="both"}),
#' outliers(\code{type="outliers"}), or everything together
#' (\code{type="all"}). \code{type="auto"} leaves to the class of dsc to
#' decide.
#' @param assignment logical; show assignment area of micro-clusters.
#' @param \dots further arguments are passed on to plot or pairs in
#' \pkg{graphics}.
#' @author Michael Hahsler
#' @seealso \code{\link{DSC}}, \code{\link{DSD}}
#' @examples
#'
#' stream <- DSD_Gaussians(k=3, d=3)
#'
#' ## plot data
#' plot(stream, n=500)
#' plot(stream, method="pc", n=500)
#' plot(stream, method="scatter", dim=c(1,3), n=500)
#'
#' ## create and plot micro-clusters
#' dstream <- DSC_DStream(gridsize=0.1)
#' update(dstream, stream, 500)
#' plot(dstream)
#'
#' ## plot with data, projected on the first two principal components
#' ## and dimensions 2 and 3
#' plot(dstream, stream)
#' plot(dstream, stream, method="pc")
#' plot(dstream, stream, dim=c(2,3))
#'
#' ## plot micro and macro-clusters
#' plot(dstream, stream, type="both")
plot.DSC <- function(x, dsd = NULL, n = 500,
col_points=NULL,
col_clusters=c("red", "blue", "green"),
Expand All @@ -112,7 +238,7 @@ plot.DSC <- function(x, dsd = NULL, n = 500,
...) {

type <- match.arg(type)
if(type == "outliers" && !is(x, "DSCOutlier"))
if(type == "outliers" && !is(x, "DSOutlier"))
stop("The clusterer is not an outlier detector, cannot draw outliers")

if(is.null(col_points)) col_points <- .points_col
Expand Down Expand Up @@ -226,3 +352,4 @@ plot.DSC <- function(x, dsd = NULL, n = 500,
}

}

Loading

0 comments on commit fb73fee

Please sign in to comment.