Skip to content

Commit

Permalink
update stella/jasper metainfo (#1896)
Browse files Browse the repository at this point in the history
update stella meta
  • Loading branch information
Samoed authored Jan 29, 2025
1 parent 0861254 commit 976bdd5
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 8 deletions.
17 changes: 13 additions & 4 deletions mteb/models/jasper_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,17 @@ def encode(
use_instructions=True,
adapted_from=None,
superseded_by=None,
training_datasets=nvidia_training_datasets, # "In jasper model the teacher model is nvidia/NV-Embed-v2", source https://huggingface.co/infgrad/jasper_en_vision_language_v1
# "non_mteb": ["BAAI/Infinity-MM", "HuggingFaceFW/fineweb-edu"],
public_training_code=None,
public_training_data=None,
training_datasets={
# stage 1, 2, 3
# "In jasper model the teacher model is nvidia/NV-Embed-v2", source https://huggingface.co/infgrad/jasper_en_vision_language_v1
**nvidia_training_datasets,
# fineweb-edu
# https://huggingface.co/datasets/sentence-transformers/embedding-training-data
# stage 4
# BAAI/Infinity-MM
},
# training logs https://api.wandb.ai/links/dunnzhang0/z8jqoqpb
# more codes https://huggingface.co/NovaSearch/jasper_en_vision_language_v1/commit/da9b77d56c23d9398fa8f93af449102784f74e1d
public_training_code="https://github.com/NovaSearch-Team/RAG-Retrieval/blob/c40f4638b705eb77d88305d2056901ed550f9f4b/rag_retrieval/train/embedding/README.md",
public_training_data="https://huggingface.co/datasets/infgrad/jasper_text_distill_dataset",
)
6 changes: 2 additions & 4 deletions mteb/models/stella_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,7 @@
framework=["Sentence Transformers", "PyTorch", "GritLM"],
reference="https://huggingface.co/dunzhang/stella_en_400M_v5",
training_datasets=None,
# will be at https://github.com/NLPJCL/RAG-Retrieval
public_training_code=None,
public_training_code="https://github.com/NovaSearch-Team/RAG-Retrieval/blob/c40f4638b705eb77d88305d2056901ed550f9f4b/rag_retrieval/train/embedding/README.md",
public_training_data=None,
)

Expand All @@ -56,9 +55,8 @@
similarity_fn_name="cosine",
framework=["Sentence Transformers", "PyTorch", "GritLM"],
reference="https://huggingface.co/dunzhang/stella_en_1.5B_v5",
# will be at https://github.com/NLPJCL/RAG-Retrieval
training_datasets=None,
public_training_code=None,
public_training_code="https://github.com/NovaSearch-Team/RAG-Retrieval/blob/c40f4638b705eb77d88305d2056901ed550f9f4b/rag_retrieval/train/embedding/README.md",
public_training_data=None,
)

Expand Down

0 comments on commit 976bdd5

Please sign in to comment.