twosixlabs · mwartell · May 7, 2022 · May 7, 2022 · May 7, 2022 · May 8, 2022
diff --git a/armory/art_experimental/attacks/sweep.py b/armory/art_experimental/attacks/sweep.py
@@ -200,14 +200,14 @@ def _load_metric_fn(self, metric_dict):
         metric_module_name = metric_dict.get("module")
         if metric_module_name is None:
             # by default use categorical accuracy to measure attack success
-            from armory.utils.metrics import categorical_accuracy
+            from armory import metrics
 
             log.info(
                 "Using default categorical accuracy to measure attack success "
                 "since attack_config['sweep_params']['metric']['module'] is "
                 "unspecified."
             )
-            self.metric_fn = categorical_accuracy
+            self.metric_fn = metrics.get("categorical_accuracy")
             self.metric_threshold = (
                 0.5  # for binary metric, any x s.t. 0 < x < 1 suffices
             )

diff --git a/armory/baseline_models/pytorch/micronnet_gtsrb_bean_regularization.py b/armory/baseline_models/pytorch/micronnet_gtsrb_bean_regularization.py
@@ -55,7 +55,7 @@ def forward(self, x: Tensor) -> Tensor:
 
 
 class get_model(nn.Module):
-    def __init__(self, weights_path: Optional[str], model_kwargs: dict):
+    def __init__(self, weights_path: Optional[str], **model_kwargs):
         super().__init__()
         self.inner_model = Micronnet(**model_kwargs)
         self.inner_model.to(DEVICE)

diff --git a/armory/instrument/config.py b/armory/instrument/config.py
@@ -11,7 +11,8 @@
     get_hub,
 )
 from armory.logs import log
-from armory.utils import metrics
+
+from armory import metrics
 
 
 class MetricsLogger:
@@ -131,7 +132,7 @@ def construct_meters_for_perturbation_metrics(
 
     hub = get_hub()
     for name in names:
-        metric = metrics.get_supported_metric(name)
+        metric = metrics.get(name)
         hub.connect_meter(
             Meter(
                 f"perturbation_{name}",
@@ -192,12 +193,12 @@ def _write(self, name, batch, result):
         #    E.g., if someone renames this from "benign_word_error_rate" to "benign_wer"
         if "word_error_rate" in name:
             if "total_word_error_rate" not in name:
-                result = metrics.get_supported_metric("total_wer")(result)
+                result = metrics.get("total_wer")(result)
             total, (num, denom) = result
             f_result = f"total={total:.2%}, {num}/{denom}"
         elif "entailment" in name:
             if "total_entailment" not in name:
-                result = metrics.get_supported_metric("total_entailment")(result)
+                result = metrics.get("total_entailment")(result)
             total = sum(result.values())
             f_result = (
                 f"contradiction: {result['contradiction']}/{total}, "
@@ -208,8 +209,8 @@ def _write(self, name, batch, result):
             if "input_to" in name:
                 for m in MEAN_AP_METRICS:
                     if m in name:
-                        metric = metrics.get_supported_metric(m)
-                        result = metrics.MeanAP(metric)(result)
+                        metric = metrics.get(m)
+                        result = metrics.task.MeanAP(metric)(result)
                         break
             f_result = f"{result}"
         elif any(m in name for m in QUANTITY_METRICS):
@@ -235,22 +236,22 @@ def _task_metric(
     Return list of meters generated for this specific task
     """
     meters = []
-    metric = metrics.get_supported_metric(name)
+    metric = metrics.get(name)
     final_kwargs = {}
     if name in MEAN_AP_METRICS:
         final_suffix = name
-        final = metrics.MeanAP(metric)
+        final = metrics.task.MeanAP(metric)
         final_kwargs = metric_kwargs
 
         name = f"input_to_{name}"
-        metric = metrics.get_supported_metric("identity_unzip")
+        metric = metrics.get("identity_unzip")
         metric_kwargs = None
         record_final_only = True
     elif name == "entailment":
-        final = metrics.get_supported_metric("total_entailment")
+        final = metrics.get("total_entailment")
         final_suffix = "total_entailment"
     elif name == "word_error_rate":
-        final = metrics.get_supported_metric("total_wer")
+        final = metrics.get("total_wer")
         final_suffix = "total_word_error_rate"
     elif use_mean:
         final = np.mean
@@ -366,22 +367,22 @@ def _task_metric_wrt_benign_predictions(
     Return the meter generated for this specific task
     Return list of meters generated for this specific task
     """
-    metric = metrics.get_supported_metric(name)
+    metric = metrics.get(name)
     final_kwargs = {}
     if name in MEAN_AP_METRICS:
         final_suffix = name
-        final = metrics.MeanAP(metric)
+        final = metrics.task.MeanAP(metric)
         final_kwargs = metric_kwargs
 
         name = f"input_to_{name}"
-        metric = metrics.get_supported_metric("identity_unzip")
+        metric = metrics.get("identity_unzip")
         metric_kwargs = None
         record_final_only = True
     elif name == "entailment":
-        final = metrics.get_supported_metric("total_entailment")
+        final = metrics.get("total_entailment")
         final_suffix = "total_entailment"
     elif name == "word_error_rate":
-        final = metrics.get_supported_metric("total_wer")
+        final = metrics.get("total_wer")
         final_suffix = "total_word_error_rate"
     elif use_mean:
         final = np.mean

diff --git a/armory/metrics/__init__.py b/armory/metrics/__init__.py
@@ -1 +1,92 @@
-from armory.metrics import compute, perturbation
+import importlib
+
+from armory.metrics import compute, perturbation, statistical, task
+
+SUPPORTED_METRICS = {}
+for namespace in (
+    perturbation.batch,
+    task.batch,
+    task.aggregate,
+    task.population,
+    statistical.registered,
+):
+    assert not any(k in namespace for k in SUPPORTED_METRICS)
+    SUPPORTED_METRICS.update(namespace)
+
+
+def _instantiate_validate(function, name, instantiate_if_class=True):
+    if isinstance(function, type) and issubclass(function, object):
+        if instantiate_if_class:
+            function = function()
+    if not callable(function):
+        raise ValueError(f"function {name} is not callable")
+    return function
+
+
+def supported(name):
+    """
+    Return whether given name is a supported metric
+    """
+    return name in SUPPORTED_METRICS
+
+
+def get_supported_metric(name, instantiate_if_class=True):
+    try:
+        function = SUPPORTED_METRICS[name]
+    except KeyError:
+        raise KeyError(f"{name} is not part of armory.metrics")
+    return _instantiate_validate(
+        function, name, instantiate_if_class=instantiate_if_class
+    )
+
+
+def load(string, instantiate_if_class=True):
+    """
+    Import load a function from the given '.'-separated identifier string
+    """
+    tokens = string.split(".")
+    if not all(token.isidentifier() for token in tokens):
+        raise ValueError(f"{string} is not a valid '.'-separated set of identifiers")
+    if len(tokens) < 2:
+        raise ValueError(f"{string} not a valid module and function path")
+
+    errors = []
+    for i in range(len(tokens) - 1, 0, -1):
+        module_name = ".".join(tokens[:i])
+        metric_name = ".".join(tokens[i:])
+        try:
+            module = importlib.import_module(module_name)
+        except ImportError:
+            errors.append(f"ImportError: import {module_name}")
+            continue
+        try:
+            obj = module
+            for name in tokens[i:]:
+                obj = getattr(obj, name)
+            function = obj
+            break
+        except AttributeError:
+            errors.append(
+                f"AttributeError: module {module_name} has no attribute {metric_name}"
+            )
+    else:
+        error_string = "\n    ".join([""] + errors)
+        raise ValueError(
+            f"Could not import metric {string}. "
+            f"The following errors occurred: {error_string}"
+        )
+
+    return _instantiate_validate(
+        function, string, instantiate_if_class=instantiate_if_class
+    )
+
+
+def get(name, instantiate_if_class=True):
+    """
+    Get the given metric, first by looking for it in armory, then via import
+        instantiate_if_class - if a class is returned, instantiate it when True
+    """
+    try:
+        return get_supported_metric(name, instantiate_if_class=instantiate_if_class)
+    except KeyError:
+        return load(name, instantiate_if_class=instantiate_if_class)
diff --git a/armory/metrics/common.py b/armory/metrics/common.py
@@ -0,0 +1,94 @@
+"""
+Supporting tools for metrics
+"""
+
+import functools
+
+import numpy as np
+
+from armory.logs import log
+
+
+class MetricNameSpace:
+    """
+    Used to keep track of metrics and make them easily discoverable and enumerable
+    """
+
+    def __setattr__(self, name, function):
+        if name.startswith("_"):
+            raise ValueError(f"Function name '{name}' cannot start with '_'")
+        if hasattr(self, name):
+            raise ValueError(f"Cannot overwrite existing function {name}")
+        if not callable(function):
+            raise ValueError(f"{name} function {function} is not callable")
+        super().__setattr__(name, function)
+
+    def __delattr__(self, name):
+        raise ValueError("Deletion not allowed")
+
+    def _names(self):
+        return sorted(x for x in self.__dict__ if not x.startswith("_"))
+
+    def __contains__(self, name):
+        return name in self._names()
+
+    def __repr__(self):
+        """
+        Show the existing non-underscore names
+        """
+        return str(self._names())
+
+    def __iter__(self):
+        for name in self._names():
+            yield name, self[name]
+
+    def __getitem__(self, name):
+        if not hasattr(self, name):
+            raise KeyError(name)
+        return getattr(self, name)
+
+    def __setitem__(self, name, function):
+        setattr(self, name, function)
+
+
+def set_namespace(namespace, metric, name=None):
+    """
+    Set the namespace, getting the metric name if none given, and return the metric
+    """
+    if name is None:
+        name = metric.__name__
+    setattr(namespace, name, metric)
+    return metric
+
+
+def as_batch(element_metric):
+    """
+    Return a batchwise metric function from an elementwise metric function
+    """
+
+    @functools.wraps(element_metric)
+    def wrapper(x_batch, x_adv_batch, **kwargs):
+        x_batch = list(x_batch)
+        x_adv_batch = list(x_adv_batch)
+        if len(x_batch) != len(x_adv_batch):
+            raise ValueError(
+                f"len(a_batch) {len(x_batch)} != len(b_batch) {len(x_adv_batch)}"
+            )
+        y = []
+        for x, x_adv in zip(x_batch, x_adv_batch):
+            y.append(element_metric(x, x_adv, **kwargs))
+        try:
+            y = np.array(y)
+        except ValueError:
+            # Handle ragged arrays
+            y = np.array(y, dtype=object)
+        return y
+
+    if wrapper.__doc__ is None:
+        log.warning(f"{element_metric.__name__} has no doc string")
+        wrapper.__doc__ = ""
+    wrapper.__doc__ = "Batch version of:\n" + wrapper.__doc__
+    wrapper.__name__ = "batch_" + wrapper.__name__
+    # note: repr(wrapper) defaults to the element_metric, not __name__
+    # See: https://stackoverflow.com/questions/10875442/possible-to-change-a-functions-repr-in-python
+    return wrapper