Skip to content

Commit

Permalink
fix: Filling missing metadata for leaderboard release (#1895)
Browse files Browse the repository at this point in the history
* Update ArxivClusteringS2S.py

* fill some metadat for retrieval

* fill in the reste of missing metadata

* fix metadata

* fix climatefever metadata

* fix: Added CQADupstack annotations

* removed annotation for non-exisitant task

* format

* Added financial to other financial dataset

* Moved ArguAna annotation to derivate datasets

---------

Co-authored-by: Kenneth Enevoldsen <kennethcenevoldsen@gmail.com>
  • Loading branch information
imenelydiaker and KennethEnevoldsen authored Jan 30, 2025
1 parent b7e412d commit 938e90f
Show file tree
Hide file tree
Showing 42 changed files with 242 additions and 213 deletions.
2 changes: 2 additions & 0 deletions mteb/abstasks/TaskMetadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@
"Written",
"Programming",
"Chemistry",
"Financial",
]

SAMPLE_CREATION_METHOD = Literal[
Expand Down Expand Up @@ -171,6 +172,7 @@
"gpl-3.0",
"cdla-sharing-1.0",
"mpl-2.0",
"msr-la-nc",
"multiple",
]
)
Expand Down
3 changes: 2 additions & 1 deletion mteb/models/arctic_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,8 @@
# in MTEB
"NQ": ["test"],
"NQHardNegatives": ["test"],
"HotPotQA": ["test"],
"NQ-PL": ["test"],
"HotPotQA": ["test"], # translated, not trained on
"HotPotQAHardNegatives": ["test"],
"HotPotQA-PL": ["test"], # translated from hotpotQA (not trained on)
"FEVER": ["test"],
Expand Down
1 change: 0 additions & 1 deletion mteb/models/e5_instruct.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
**E5_TRAINING_DATA,
"FEVER": ["train"],
"FEVERHardNegatives": ["train"],
"FEVER-PL": ["train"], # translation not trained on
"HotpotQA": ["train"],
"HotpotQAHardNegatives": ["train"],
"HotpotQA-PL": ["train"], # translation not trained on
Expand Down
1 change: 0 additions & 1 deletion mteb/models/e5_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,6 @@
**E5_TRAINING_DATA,
"FEVER": ["train"],
"FEVERHardNegatives": ["train"],
"FEVER-PL": ["train"], # translation not trained on
"HotpotQA": ["train"],
"HotpotQAHardNegatives": ["train"],
"HotpotQA-PL": ["train"], # translation not trained on
Expand Down
1 change: 0 additions & 1 deletion mteb/models/gritlm_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
# also uses medi2 which contains fever and hotpotqa:
"FEVER": ["train"],
"FEVERHardNegatives": ["train"],
"FEVER-PL": ["train"], # translation not trained on
"HotpotQA": ["train"],
"HotpotQAHardNegatives": ["train"],
"HotpotQA-PL": ["train"], # translation not trained on
Expand Down
1 change: 0 additions & 1 deletion mteb/models/salesforce_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ def instruction_template(
"FiQA2018-PL": ["train"],
"FEVER": ["train"],
"FEVERHardNegatives": ["train"],
"FEVER-PL": ["train"], # translation not trained on
"HotpotQA": ["train"],
"HotpotQAHardNegatives": ["train"],
"HotpotQA-PL": ["train"], # translation not trained on
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class FinancialPhrasebankClassification(AbsTaskClassification):
eval_langs=["eng-Latn"],
main_score="accuracy",
date=("2013-11-01", "2013-11-01"),
domains=["News", "Written"],
domains=["News", "Written", "Financial"],
task_subtypes=["Sentiment/Hate speech"],
license="cc-by-nc-sa-3.0",
annotations_creators="expert-annotated",
Expand Down
2 changes: 1 addition & 1 deletion mteb/tasks/Classification/kor/KorFin.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class KorFin(AbsTaskClassification):
"2022-01-01",
"2022-12-31",
), # Assumed date based on the citations in the paper
domains=["News", "Written"],
domains=["News", "Written", "Financial"],
task_subtypes=["Sentiment/Hate speech"],
license="cc-by-sa-4.0",
annotations_creators="expert-annotated",
Expand Down
14 changes: 7 additions & 7 deletions mteb/tasks/Clustering/eng/ArxivClusteringS2S.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@ class ArxivClusteringS2S(AbsTaskClustering):
eval_splits=["test"],
eval_langs=["eng-Latn"],
main_score="v_measure",
date=None,
domains=None,
task_subtypes=None,
license=None,
annotations_creators=None,
dialect=None,
sample_creation=None,
date=("1991-01-01", "2021-01-01"), # 1991-01-01 is the first arxiv paper
domains=["Academic", "Written"],
task_subtypes=[],
license="cc0-1.0",
annotations_creators="derived",
dialect=[],
sample_creation="found",
bibtex_citation="""@misc{arxiv_org_submitters_2024,
title={arXiv Dataset},
url={https://www.kaggle.com/dsv/7548853},
Expand Down
15 changes: 7 additions & 8 deletions mteb/tasks/Clustering/eng/RedditClustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,14 +85,13 @@ class RedditClustering(AbsTaskClustering):
eval_splits=["test"],
eval_langs=["eng-Latn"],
main_score="v_measure",
date=None,
form=None,
domains=None,
task_subtypes=None,
license=None,
annotations_creators=None,
dialect=None,
sample_creation=None,
date=("2021-01-01", "2021-04-14"),
domains=["Web", "Social", "Written"],
task_subtypes=["Thematic clustering"],
license="not specified", # derived from pushshift
annotations_creators="derived",
dialect=[],
sample_creation="found",
bibtex_citation="""@article{geigle:2021:arxiv,
author = {Gregor Geigle and
Nils Reimers and
Expand Down
15 changes: 7 additions & 8 deletions mteb/tasks/Clustering/eng/RedditClusteringP2P.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,13 @@ class RedditClusteringP2P(AbsTaskClustering):
eval_splits=["test"],
eval_langs=["eng-Latn"],
main_score="v_measure",
date=None,
form=None,
domains=None,
task_subtypes=None,
license=None,
annotations_creators=None,
dialect=None,
sample_creation=None,
date=("2021-01-01", "2021-04-14"),
domains=["Web", "Social", "Written"],
task_subtypes=["Thematic clustering"],
license="not specified", # derived from pushshift
annotations_creators="derived",
dialect=[],
sample_creation="found",
bibtex_citation="""@article{geigle:2021:arxiv,
author = {Gregor Geigle and
Nils Reimers and
Expand Down
15 changes: 7 additions & 8 deletions mteb/tasks/Clustering/eng/StackExchangeClustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,14 +87,13 @@ class StackExchangeClustering(AbsTaskClustering):
eval_splits=["test"],
eval_langs=["eng-Latn"],
main_score="v_measure",
date=None,
form=None,
domains=None,
task_subtypes=None,
license=None,
annotations_creators=None,
dialect=None,
sample_creation=None,
date=("2021-01-01", "2021-04-14"),
domains=["Web", "Written"],
task_subtypes=["Thematic clustering"],
license="not specified",
annotations_creators="derived",
dialect=[],
sample_creation="found",
bibtex_citation="""@article{geigle:2021:arxiv,
author = {Gregor Geigle and
Nils Reimers and
Expand Down
14 changes: 7 additions & 7 deletions mteb/tasks/Clustering/eng/StackExchangeClusteringP2P.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,13 +91,13 @@ class StackExchangeClusteringP2P(AbsTaskClustering):
eval_splits=["test"],
eval_langs=["eng-Latn"],
main_score="v_measure",
date=None,
domains=None,
task_subtypes=None,
license=None,
annotations_creators=None,
dialect=None,
sample_creation=None,
date=("2021-01-01", "2021-04-14"),
domains=["Web", "Written"],
task_subtypes=["Thematic clustering"],
license="not specified",
annotations_creators="derived",
dialect=[],
sample_creation="found",
bibtex_citation="""@article{geigle:2021:arxiv,
author = {Gregor Geigle and
Nils Reimers and
Expand Down
12 changes: 6 additions & 6 deletions mteb/tasks/PairClassification/eng/TwitterSemEval2015PC.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@ class TwitterSemEval2015PC(AbsTaskPairClassification):
eval_langs=["eng-Latn"],
main_score="max_ap",
date=None,
domains=None,
task_subtypes=None,
license=None,
annotations_creators=None,
dialect=None,
sample_creation=None,
domains=["Social", "Written"],
task_subtypes=[],
license="not specified",
annotations_creators="human-annotated",
dialect=[],
sample_creation="found",
bibtex_citation="""@inproceedings{xu-etal-2015-semeval,
title = "{S}em{E}val-2015 Task 1: Paraphrase and Semantic Similarity in {T}witter ({PIT})",
author = "Xu, Wei and
Expand Down
12 changes: 6 additions & 6 deletions mteb/tasks/PairClassification/eng/TwitterURLCorpusPC.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@ class TwitterURLCorpusPC(AbsTaskPairClassification):
eval_langs=["eng-Latn"],
main_score="max_ap",
date=None,
domains=None,
task_subtypes=None,
license=None,
annotations_creators=None,
dialect=None,
sample_creation=None,
domains=["Social", "Written"],
task_subtypes=[],
license="not specified",
annotations_creators="derived",
dialect=[],
sample_creation="found",
bibtex_citation="""@inproceedings{lan-etal-2017-continuously,
title = "A Continuously Growing Dataset of Sentential Paraphrases",
author = "Lan, Wuwei and
Expand Down
8 changes: 4 additions & 4 deletions mteb/tasks/Reranking/eng/AskUbuntuDupQuestions.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@ class AskUbuntuDupQuestions(AbsTaskReranking):
eval_langs=["eng-Latn"],
main_score="map",
date=None,
domains=None,
domains=["Programming", "Web"],
task_subtypes=None,
license=None,
annotations_creators=None,
dialect=None,
sample_creation=None,
annotations_creators="human-annotated",
dialect=[],
sample_creation="found",
prompt="Retrieve duplicate questions from AskUbuntu forum",
bibtex_citation="""@article{wang-2021-TSDAE,
title = "TSDAE: Using Transformer-based Sequential Denoising Auto-Encoderfor Unsupervised Sentence Embedding Learning",
Expand Down
14 changes: 7 additions & 7 deletions mteb/tasks/Reranking/eng/StackOverflowDupQuestions.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@ class StackOverflowDupQuestions(AbsTaskReranking):
eval_splits=["test"],
eval_langs=["eng-Latn"],
main_score="map",
date=None,
domains=None,
task_subtypes=None,
license=None,
annotations_creators=None,
dialect=None,
sample_creation=None,
date=("2014-01-21", "2018-01-01"),
domains=["Written", "Blog", "Programming"],
task_subtypes=["Question answering"],
license="cc-by-nc-sa-4.0",
annotations_creators="derived",
dialect=[],
sample_creation="found",
prompt="Retrieve duplicate questions from StackOverflow forum",
bibtex_citation="""@article{Liu2018LinkSOAD,
title={LinkSO: a dataset for learning to retrieve similar question answer pairs on software development forums},
Expand Down
12 changes: 6 additions & 6 deletions mteb/tasks/Retrieval/eng/CQADupstackAndroidRetrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@ class CQADupstackAndroidRetrieval(AbsTaskRetrieval):
eval_langs=["eng-Latn"],
main_score="ndcg_at_10",
date=None,
domains=None,
task_subtypes=None,
license=None,
annotations_creators=None,
dialect=None,
sample_creation=None,
domains=["Programming", "Web", "Written", "Non-fiction"],
task_subtypes=["Question answering", "Duplicate Detection"],
license="apache-2.0",
annotations_creators="derived",
dialect=[],
sample_creation="found",
bibtex_citation="""@inproceedings{hoogeveen2015,
author = {Hoogeveen, Doris and Verspoor, Karin M. and Baldwin, Timothy},
title = {CQADupStack: A Benchmark Data Set for Community Question-Answering Research},
Expand Down
12 changes: 6 additions & 6 deletions mteb/tasks/Retrieval/eng/CQADupstackEnglishRetrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@ class CQADupstackEnglishRetrieval(AbsTaskRetrieval):
eval_langs=["eng-Latn"],
main_score="ndcg_at_10",
date=None,
domains=None,
task_subtypes=None,
license=None,
annotations_creators=None,
dialect=None,
sample_creation=None,
domains=["Written"],
task_subtypes=["Question answering", "Duplicate Detection"],
license="apache-2.0",
annotations_creators="derived",
dialect=[],
sample_creation="found",
bibtex_citation="""@inproceedings{hoogeveen2015,
author = {Hoogeveen, Doris and Verspoor, Karin M. and Baldwin, Timothy},
title = {CQADupStack: A Benchmark Data Set for Community Question-Answering Research},
Expand Down
12 changes: 6 additions & 6 deletions mteb/tasks/Retrieval/eng/CQADupstackGamingRetrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@ class CQADupstackGamingRetrieval(AbsTaskRetrieval):
eval_langs=["eng-Latn"],
main_score="ndcg_at_10",
date=None,
domains=None,
task_subtypes=None,
license=None,
annotations_creators=None,
dialect=None,
sample_creation=None,
domains=["Web", "Written"],
task_subtypes=["Question answering", "Duplicate Detection"],
license="apache-2.0",
annotations_creators="derived",
dialect=[],
sample_creation="found",
bibtex_citation="""@inproceedings{hoogeveen2015,
author = {Hoogeveen, Doris and Verspoor, Karin M. and Baldwin, Timothy},
title = {CQADupStack: A Benchmark Data Set for Community Question-Answering Research},
Expand Down
12 changes: 6 additions & 6 deletions mteb/tasks/Retrieval/eng/CQADupstackGisRetrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@ class CQADupstackGisRetrieval(AbsTaskRetrieval):
eval_langs=["eng-Latn"],
main_score="ndcg_at_10",
date=None,
domains=None,
task_subtypes=None,
license=None,
annotations_creators=None,
dialect=None,
sample_creation=None,
domains=["Written", "Non-fiction"],
task_subtypes=["Question answering", "Duplicate Detection"],
license="apache-2.0",
annotations_creators="derived",
dialect=[],
sample_creation="found",
bibtex_citation="""@inproceedings{hoogeveen2015,
author = {Hoogeveen, Doris and Verspoor, Karin M. and Baldwin, Timothy},
title = {CQADupStack: A Benchmark Data Set for Community Question-Answering Research},
Expand Down
12 changes: 6 additions & 6 deletions mteb/tasks/Retrieval/eng/CQADupstackMathematicaRetrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@ class CQADupstackMathematicaRetrieval(AbsTaskRetrieval):
eval_langs=["eng-Latn"],
main_score="ndcg_at_10",
date=None,
domains=None,
task_subtypes=None,
license=None,
annotations_creators=None,
dialect=None,
sample_creation=None,
domains=["Written", "Academic", "Non-fiction"],
task_subtypes=["Question answering", "Duplicate Detection"],
license="apache-2.0",
annotations_creators="derived",
dialect=[],
sample_creation="found",
bibtex_citation="""@inproceedings{hoogeveen2015,
author = {Hoogeveen, Doris and Verspoor, Karin M. and Baldwin, Timothy},
title = {CQADupStack: A Benchmark Data Set for Community Question-Answering Research},
Expand Down
12 changes: 6 additions & 6 deletions mteb/tasks/Retrieval/eng/CQADupstackPhysicsRetrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@ class CQADupstackPhysicsRetrieval(AbsTaskRetrieval):
eval_langs=["eng-Latn"],
main_score="ndcg_at_10",
date=None,
domains=None,
task_subtypes=None,
license=None,
annotations_creators=None,
dialect=None,
sample_creation=None,
domains=["Written", "Academic", "Non-fiction"],
task_subtypes=["Question answering", "Duplicate Detection"],
license="apache-2.0",
annotations_creators="derived",
dialect=[],
sample_creation="found",
bibtex_citation="""@inproceedings{hoogeveen2015,
author = {Hoogeveen, Doris and Verspoor, Karin M. and Baldwin, Timothy},
title = {CQADupStack: A Benchmark Data Set for Community Question-Answering Research},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class CQADupstackProgrammersRetrieval(AbsTaskRetrieval):
date=None,
domains=["Programming", "Written", "Non-fiction"],
task_subtypes=[],
license="cc-by-sa-4.0",
license="apache-2.0",
annotations_creators="derived",
dialect=[],
sample_creation="found",
Expand Down
Loading

0 comments on commit 938e90f

Please sign in to comment.