Skip to content

Commit

Permalink
Merge pull request #11 from NorskRegnesentral/new_detector_base_class
Browse files Browse the repository at this point in the history
[ENH] New detector base class
  • Loading branch information
Tveten authored Aug 23, 2024
2 parents 74bdd0f + 8777d95 commit 6cd6796
Show file tree
Hide file tree
Showing 33 changed files with 1,345 additions and 813 deletions.
4 changes: 2 additions & 2 deletions NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ using the same example data as for anomaly detection.

### Changepoints in univariate data or multivariate data without subset changes
```python
detector = ChangepointDetector().fit(x_univariate)
detector = ChangeDetector().fit(x_univariate)
detector.predict(x_univariate)
0 0
1 1
Expand All @@ -152,7 +152,7 @@ dtype: int64
```
### Subset changepoints in multivariate data
```python
detector = SubsetChangepointDetector().fit(x_multivariate)
detector = SubsetChangeDetector().fit(x_multivariate)
detector.predict(x_multivariate)
index columns
0 0 [0]
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ from skchange.change_detectors.moscore import Moscore
from skchange.datasets.generate import generate_teeth_data

df = generate_teeth_data(n_segments=10, segment_length=50, mean=5, random_state=1)
detector = Moscore(bandwidth=10, fmt="sparse")
detector = Moscore(bandwidth=10)
detector.fit_predict(df)
>>>
0 49
Expand Down Expand Up @@ -51,7 +51,7 @@ df = generate_teeth_data(
affected_proportion=0.2,
random_state=2,
)
detector = Mvcapa(collective_penalty="sparse", fmt="sparse")
detector = Mvcapa(collective_penalty="sparse")
detector.fit_predict(df)
>>>
start end components
Expand Down
11 changes: 7 additions & 4 deletions interactive/benchmark.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,23 @@
"""Benchmarking the computational efficiency of the detectors."""

from timeit import timeit

import numpy as np
import pandas as pd
import plotly.express as px

from skchange.anomaly_detectors.tests.test_anomaly_detectors import anomaly_detectors
from skchange.change_detectors.tests.test_change_detectors import change_detectors
from skchange.anomaly_detectors import ANOMALY_DETECTORS
from skchange.change_detectors import CHANGE_DETECTORS

# TODO: Add all the different scores and costs.
detector_classes = anomaly_detectors + change_detectors
# TODO: Make sure hyperparameters are set such that comparisons are fair.
detector_classes = ANOMALY_DETECTORS + CHANGE_DETECTORS
ns = [1000, 10000, 100000, 1000000]
n_runs = [100, 10, 1, 1]
timings = {}
for detector_class in detector_classes:
detector_name = detector_class.__name__
detector = detector_class.create_test_instance().set_params(fmt="sparse")
detector = detector_class.create_test_instance()
setup_data = pd.DataFrame(np.random.normal(0, 1, size=1000))
detector.fit_predict(setup_data) # Compile numba
timings[detector_name] = []
Expand Down
51 changes: 34 additions & 17 deletions interactive/explore_capa.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
"""Interactive exploration of the Capa and Mvcapa anomaly detectors."""

import pandas as pd
import plotly.express as px

from skchange.anomaly_detectors.capa import Capa
Expand All @@ -6,30 +9,44 @@
from skchange.utils.benchmarking.profiler import Profiler

# Unviariate
df = generate_teeth_data(n_segments=5, mean=10, segment_length=10, p=1, random_state=2)
capa = Capa(fmt="sparse", max_segment_length=20)
anomalies = capa.fit_predict(df)
df = generate_teeth_data(n_segments=5, segment_length=10, mean=10, random_state=2)[0]
detector = Capa(max_segment_length=20)

anomalies = detector.fit_predict(df)
print(anomalies)

capa = Capa(labels="score", fmt="dense", max_segment_length=20)
scores = capa.fit_predict(df)
anomaly_labels = detector.fit_transform(df)
px.scatter(x=df.index, y=df, color=anomaly_labels.astype(str))

capa = Capa(labels="indicator", fmt="dense", max_segment_length=20)
anomalies = capa.fit_predict(df)
px.scatter(x=df.index, y=df.values[:, 0], color=anomalies)
scores = detector.score_transform(df)
px.scatter(scores)

# Multivariate
# TODO: Add plotting functionality to assess the affected subset.
df = generate_teeth_data(5, 10, p=10, mean=10, affected_proportion=0.2, random_state=2)
capa = Mvcapa(collective_penalty="sparse", fmt="sparse")
anomalies = capa.fit_predict(df)
detector = Mvcapa(collective_penalty="sparse")

anomalies = detector.fit_predict(df)
print(anomalies)

anomaly_labels = detector.fit_transform(df)
anomaly_labels = (anomaly_labels > 0).astype(int)
anomaly_labels[anomaly_labels == 0] = 0.1
plot_df = pd.concat(
[
df.melt(ignore_index=False).reset_index(),
anomaly_labels.melt(value_name="anomaly_label")["anomaly_label"],
],
axis=1,
)
plot_df["variable"] = plot_df["variable"].astype(str)
px.scatter(plot_df, x="index", y="value", color="variable", size="anomaly_label")

capa = Mvcapa(labels="score", fmt="dense", max_segment_length=20)
scores = capa.fit_predict(df)
fig = px.line(df)
fig.add_scatter(anomaly_labels)
px.line(anomaly_labels)

capa = Mvcapa(collective_penalty_scale=5, labels="indicator", fmt="dense")
anomalies = capa.fit_predict(df)
df.plot(kind="line", backend="plotly")
anomalies.plot(kind="line", backend="plotly")
scores = detector.score_transform(df)
px.scatter(scores)


# Profiling
Expand Down
6 changes: 3 additions & 3 deletions interactive/explore_circular_binseg.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Interacive exploration of the Circular Binary Segmentation anomaly detector."""

import plotly.express as px

from skchange.anomaly_detectors.circular_binseg import (
Expand All @@ -12,9 +14,7 @@
score="mean", growth_factor=1.5, min_segment_length=10
)
anomalies = detector.fit_predict(df)

df.plot(kind="line", backend="plotly")

px.line(df)
px.scatter(detector.scores, x="argmax_anomaly_start", y="score")

# Test anomaly intervals
Expand Down
46 changes: 22 additions & 24 deletions interactive/explore_moscore.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
"""Interactive exploration of the Moscore change detector."""

import numpy as np
import plotly.express as px
from numba import njit

from skchange.change_detectors.moscore import Moscore, where
from skchange.change_detectors.moscore import Moscore
from skchange.datasets.generate import add_linspace_outliers, generate_teeth_data
from skchange.scores.mean_score import init_mean_score, mean_score
from skchange.utils.benchmarking.profiler import Profiler

# Simple univariate example
df = generate_teeth_data(n_segments=2, mean=10, segment_length=100, p=1, random_state=2)
detector = Moscore()
changepoints = detector.fit_predict(df)
px.scatter(detector.scores)
labels = detector.transform(df)
scores = detector.score_transform(df)
px.scatter(scores)


# Profiling
Expand All @@ -24,13 +27,6 @@
profiler.stop()


# Various unit tests
df = generate_teeth_data(n_segments=1, mean=10, segment_length=10, p=1)
precomputed_params = init_mean_score(df.values)
mean_score(precomputed_params, start=0, end=9, split=4)
where(np.array([True, True, True, False, False]))


# Variance score
df = generate_teeth_data(
n_segments=2, variance=16, segment_length=100, p=1, random_state=1
Expand All @@ -44,6 +40,7 @@
# Custom score
@njit
def col_median(X: np.ndarray) -> np.ndarray:
"""Compute the median of each column of X."""
m = X.shape[1]
medians = np.zeros(m)
for j in range(m):
Expand All @@ -53,27 +50,28 @@ def col_median(X: np.ndarray) -> np.ndarray:

@njit
def init_spike_score(X: np.ndarray) -> np.ndarray:
"""Initialize the spike score."""
return X


def spike_score_factory(margin: int = 0):
@njit
def spike_score(
precomputed_params: np.ndarray, start: int, end: int, split: int
) -> float:
X = precomputed_params
interval_X = np.concatenate(
(X[start : split - margin], X[split + margin + 1 : end + 1])
)
baseline_median = col_median(interval_X)
return np.sum(np.abs(X[split] - baseline_median))

return spike_score
@njit
def spike_score(
precomputed_params: np.ndarray,
start: np.ndarray,
end: np.ndarray,
split: np.ndarray,
) -> float:
"""Calculate the score for a spike at the split point."""
X = precomputed_params
baseline_median = np.zeros((len(start), X.shape[1]))
for i, (s, e) in enumerate(zip(start, end)):
baseline_median[i] = col_median(X[s : e + 1])
return np.sum(np.abs(X[split] - baseline_median), axis=1)


df = generate_teeth_data(n_segments=1, mean=0, segment_length=100, p=1)
df = add_linspace_outliers(df, n_outliers=4, outlier_size=10)
score = (spike_score_factory(margin=0), init_spike_score)
score = (spike_score, init_spike_score)
detector = Moscore(score, bandwidth=5)
anomalies = detector.fit_predict(df)
px.scatter(detector.scores)
Expand Down
14 changes: 3 additions & 11 deletions interactive/explore_moscore_anomaly.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Interactive exploration of the MoscoreAnomaly detector."""

import numpy as np
import plotly.express as px

Expand All @@ -20,17 +22,7 @@
left_bandwidth=50,
)
anomalies = detector.fit_predict(df)

detector = MoscoreAnomaly(
score="mean",
min_anomaly_length=10,
max_anomaly_length=1000,
left_bandwidth=20,
labels="score",
)
scores = detector.fit_predict(df)
scores["length"] = scores["anomaly_end"] - scores["anomaly_start"] + 1
px.scatter(scores, x="anomaly_start", y="score", color="length")
print(anomalies)


# Profiling
Expand Down
2 changes: 2 additions & 0 deletions interactive/explore_pelt.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Interactive exploration of the Pelt change detector."""

import numpy as np

from skchange.change_detectors.pelt import Pelt
Expand Down
7 changes: 4 additions & 3 deletions interactive/explore_seeded_binseg.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Interactive exploration of Seeded Binary Segmentation."""

import plotly.express as px

from skchange.change_detectors.seeded_binseg import SeededBinarySegmentation
Expand All @@ -8,9 +10,8 @@
detector = SeededBinarySegmentation(score="mean", growth_factor=2)
detector.fit_predict(df)

df.plot(kind="line", backend="plotly")

px.scatter(detector.scores, x="maximizer", y="score", hover_data=["start", "end"])
px.line(df)
px.scatter(detector.scores, x="argmax_cpt", y="score", hover_data=["start", "end"])


# Profiling
Expand Down
3 changes: 3 additions & 0 deletions interactive/explore_stat_threshold_anomaliser.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Interactive exploration of the StatThresholdAnomaliser."""

import numpy as np

from skchange.anomaly_detectors.anomalisers import StatThresholdAnomaliser
Expand All @@ -16,3 +18,4 @@
change_detector, stat=np.mean, stat_lower=-1.0, stat_upper=1.0
)
anomalies = detector.fit_predict(df)
print(anomalies)
6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@ classifiers = [
]
requires-python = ">=3.9,<3.13"
dependencies = [
"numpy<1.27,>=1.21", # required for framework layer and base class logic
"pandas<2.2.0,>=1.3", # pandas is the main in-memory data container
"numpy>=1.21",
"pandas>=1.1",
"numba>=0.56", # numba is used for fast computation throughout
"sktime>=0.23.0,<0.30.0",
"sktime>=0.30",
]

[project.urls]
Expand Down
23 changes: 23 additions & 0 deletions skchange/anomaly_detectors/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,24 @@
"""Anomaly detection algorithms."""

from skchange.anomaly_detectors.anomalisers import StatThresholdAnomaliser
from skchange.anomaly_detectors.base import (
CollectiveAnomalyDetector,
PointAnomalyDetector,
)
from skchange.anomaly_detectors.capa import Capa
from skchange.anomaly_detectors.circular_binseg import CircularBinarySegmentation
from skchange.anomaly_detectors.moscore_anomaly import MoscoreAnomaly
from skchange.anomaly_detectors.mvcapa import Mvcapa

BASE_ANOMALY_DETECTORS = [CollectiveAnomalyDetector, PointAnomalyDetector]
COLLECTIVE_ANOMALY_DETECTORS = [
Capa,
CircularBinarySegmentation,
MoscoreAnomaly,
Mvcapa,
StatThresholdAnomaliser,
]
POINT_ANOMALY_DETECTORS = []
ANOMALY_DETECTORS = COLLECTIVE_ANOMALY_DETECTORS + POINT_ANOMALY_DETECTORS

__all__ = BASE_ANOMALY_DETECTORS + ANOMALY_DETECTORS
Loading

0 comments on commit 6cd6796

Please sign in to comment.