From 8e85b8def0d511c0c0f56d5713663d079609ad22 Mon Sep 17 00:00:00 2001 From: carvalho28 Date: Sun, 9 Mar 2025 20:08:59 +0100 Subject: [PATCH] fix(squad): use StringSchema instead of MultipleChoiceSchemaLower Switch to StringSchema in the SQuAD benchmark to correctly process extractive answers. --- deepeval/benchmarks/squad/squad.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/deepeval/benchmarks/squad/squad.py b/deepeval/benchmarks/squad/squad.py index 1b4c066a7..1807c5aae 100644 --- a/deepeval/benchmarks/squad/squad.py +++ b/deepeval/benchmarks/squad/squad.py @@ -8,7 +8,7 @@ from deepeval.models import DeepEvalBaseLLM from deepeval.benchmarks.squad.task import SQuADTask from deepeval.benchmarks.squad.template import SQuADTemplate -from deepeval.benchmarks.schema import MultipleChoiceSchemaLower +from deepeval.benchmarks.schema import StringSchema from deepeval.telemetry import capture_benchmark_run from deepeval.metrics.utils import initialize_model @@ -132,8 +132,8 @@ def predict(self, model: DeepEvalBaseLLM, golden: Golden) -> Dict: # Enforced model generation try: - res: MultipleChoiceSchemaLower = model.generate( - prompt=prompt, schema=MultipleChoiceSchemaLower + res: StringSchema = model.generate( + prompt=prompt, schema=StringSchema ) prediction = res.answer except TypeError: