diff --git a/deepeval/metrics/conversation_completeness/conversation_completeness.py b/deepeval/metrics/conversation_completeness/conversation_completeness.py index c8ff14936..7e102303a 100644 --- a/deepeval/metrics/conversation_completeness/conversation_completeness.py +++ b/deepeval/metrics/conversation_completeness/conversation_completeness.py @@ -148,6 +148,9 @@ async def _a_generate_reason(self) -> str: return data["reason"] def _generate_reason(self) -> str: + if self.include_reason is False: + return None + incompletenesses: List[str] = [] for verdict in self.verdicts: if verdict.verdict.strip().lower() == "no": diff --git a/deepeval/metrics/conversation_relevancy/conversation_relevancy.py b/deepeval/metrics/conversation_relevancy/conversation_relevancy.py index 48340b66c..e2b3db449 100644 --- a/deepeval/metrics/conversation_relevancy/conversation_relevancy.py +++ b/deepeval/metrics/conversation_relevancy/conversation_relevancy.py @@ -134,6 +134,9 @@ async def a_measure( return self.score async def _a_generate_reason(self) -> str: + if self.include_reason is False: + return None + irrelevancies: List[Dict[str, str]] = [] for index, verdict in enumerate(self.verdicts): if verdict.verdict.strip().lower() == "no": diff --git a/deepeval/metrics/role_adherence/role_adherence.py b/deepeval/metrics/role_adherence/role_adherence.py index 8a461cc7a..6ccf8b37c 100644 --- a/deepeval/metrics/role_adherence/role_adherence.py +++ b/deepeval/metrics/role_adherence/role_adherence.py @@ -122,6 +122,9 @@ async def a_measure( return self.score async def _a_generate_reason(self, role: str) -> str: + if self.include_reason is False: + return None + prompt = RoleAdherenceTemplate.generate_reason( score=self.score, role=role, diff --git a/y.py b/y.py new file mode 100644 index 000000000..56506bc28 --- /dev/null +++ b/y.py @@ -0,0 +1,6 @@ +from deepeval.dataset import EvaluationDataset + +dataset = EvaluationDataset() +dataset.pull(alias="Legal Documents Dataset") + +print(dataset)