From b7242042f893ab58556734fc4db0f9af806ccc0c Mon Sep 17 00:00:00 2001 From: gowitheflow-1998 Date: Sun, 29 Dec 2024 19:59:00 +0000 Subject: [PATCH] add missing benchmark --- mteb/benchmarks/benchmarks.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/mteb/benchmarks/benchmarks.py b/mteb/benchmarks/benchmarks.py index b56b39a1b2..edb4326cae 100644 --- a/mteb/benchmarks/benchmarks.py +++ b/mteb/benchmarks/benchmarks.py @@ -438,6 +438,39 @@ def load_results( }""", ) +RAR_b = Benchmark( + name="RAR-b", + tasks=get_tasks( + tasks=[ + "ARCChallenge", + "AlphaNLI", + "HellaSwag", + "WinoGrande", + "PIQA", + "SIQA", + "Quail", + "SpartQA", + "TempReasonL1", + "TempReasonL2Pure", + "TempReasonL2Fact", + "TempReasonL2Context", + "TempReasonL3Pure", + "TempReasonL3Fact", + "TempReasonL3Context", + "RARbCode", + "RARbMath", + ] + ), + description="A benchmark to evaluate reasoning capabilities of retrievers.", + reference="https://arxiv.org/abs/2404.06347", + citation="""@article{xiao2024rar, + title={RAR-b: Reasoning as Retrieval Benchmark}, + author={Xiao, Chenghao and Hudson, G Thomas and Al Moubayed, Noura}, + journal={arXiv preprint arXiv:2404.06347}, + year={2024} + }""", +) + MTEB_FRA = Benchmark( name="MTEB(fra)", tasks=get_tasks(