Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Implement abstract EvaluationHarness class #5

Merged
merged 7 commits into from
May 22, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions haystack_experimental/evaluation/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0
from .eval_harness import EvaluationHarness, EvalRunOverrides

_all_ = ["EvaluationHarness", "EvalRunOverrides"]
81 changes: 81 additions & 0 deletions haystack_experimental/evaluation/eval_harness.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Any, Dict, Generic, Optional, Type, TypeVar

from haystack import Pipeline
from haystack.core.serialization import DeserializationCallbacks
from haystack.evaluation.eval_run_result import BaseEvaluationRunResult


@dataclass
class EvalRunOverrides:
"""
Overrides for an evaluation run. Used to override
the init parameters of components in either (or
both) the evaluated and evaluation pipelines. Each
key is a component name and its value a dictionary
with init parameters to override.

:param evaluated_pipeline_overrides:
Overrides for the evaluated pipeline.
:param evaluation_pipeline_overrides:
Overrides for the evaluation pipeline.
"""

evaluated_pipeline_overrides: Optional[Dict[str, Dict[str, Any]]] = None
evaluation_pipeline_overrides: Optional[Dict[str, Dict[str, Any]]] = None


EvalRunInputT = TypeVar("EvalRunInputT")
EvalRunOutputT = TypeVar("EvalRunOutputT", bound=BaseEvaluationRunResult)
EvalRunOverridesT = TypeVar("EvalRunOverridesT")


class EvaluationHarness(ABC, Generic[EvalRunInputT, EvalRunOverridesT, EvalRunOutputT]):
"""
Executes a pipeline with a given set of parameters, inputs and
evaluates its outputs with an evaluation pipeline.
"""

@staticmethod
def _override_pipeline(pipeline: Pipeline, parameter_overrides: Optional[Dict[str, Any]]) -> Pipeline:
def component_pre_init_callback(name: str, cls: Type, init_params: Dict[str, Any]):
assert parameter_overrides is not None
overrides = parameter_overrides.get(name)
if overrides:
init_params.update(overrides)

def validate_overrides():
if parameter_overrides is None:
return

pipeline_components = pipeline.inputs(include_components_with_connected_inputs=True).keys()
for component_name in parameter_overrides.keys():
if component_name not in pipeline_components:
raise ValueError(f"Cannot override non-existent component '{component_name}'")

callbacks = DeserializationCallbacks(component_pre_init_callback)
if parameter_overrides:
validate_overrides()
serialized_pipeline = pipeline.dumps()
pipeline = Pipeline.loads(serialized_pipeline, callbacks=callbacks)

return pipeline

@abstractmethod
def run(
self, inputs: EvalRunInputT, *, overrides: Optional[EvalRunOverridesT] = None, run_name: Optional[str] = None
) -> EvalRunOutputT:
"""
Launch a evaluation run.

:param inputs:
Inputs to the evaluated and evaluation pipelines.
:param overrides:
Overrides for the harness.
:param run_name:
A name for the evaluation run.
:returns:
The output of the evaluation pipeline.
"""
...
Loading