diff --git a/deepeval/_version.py b/deepeval/_version.py index 8aec22e1a..15283f469 100644 --- a/deepeval/_version.py +++ b/deepeval/_version.py @@ -1 +1 @@ -__version__: str = "0.9.4" +__version__: str = "0.9.5" diff --git a/deepeval/dataset.py b/deepeval/dataset.py index ca552e3d8..794b1b50b 100644 --- a/deepeval/dataset.py +++ b/deepeval/dataset.py @@ -168,6 +168,7 @@ def run_evaluation( max_retries: int = 3, min_success: int = 1, raise_error: bool = False, + metrics: List[TestCase] = None, ): table = [] @@ -211,6 +212,8 @@ def assert_metric(): else: try: assert_metric() + except AssertionError as e: + print(e) except Exception as e: print(e) if test_filename is None: diff --git a/deepeval/metrics/metric.py b/deepeval/metrics/metric.py index 941128131..da772be94 100644 --- a/deepeval/metrics/metric.py +++ b/deepeval/metrics/metric.py @@ -18,6 +18,8 @@ class Metric: + _success: bool = False + @abstractmethod def measure(self, output, expected_output, query: Optional[str] = None): pass @@ -31,6 +33,14 @@ def _get_init_values(self): } return init_values + @property + def success(self): + return self._success + + @success.setter + def success(self, value): + self._success = value + @abstractmethod def is_successful(self) -> bool: return False diff --git a/deepeval/test_case.py b/deepeval/test_case.py index ca315da74..127dab8a0 100644 --- a/deepeval/test_case.py +++ b/deepeval/test_case.py @@ -3,7 +3,7 @@ from collections import UserList from .metrics.metric import Metric from .metrics.randomscore import RandomMetric -from .metrics.entailment_metric import EntailmentScoreMetric +from .metrics.factual_consistency import FactualConsistencyMetric class TestCase: @@ -15,7 +15,7 @@ def __init__( id: str = None, ): if metrics is None: - self.metrics = [EntailmentScoreMetric(minimum_score=0.3)] + self.metrics = [FactualConsistencyMetric(minimum_score=0.3)] else: self.metrics = metrics self.input = input