Skip to content

Commit

Permalink
Merge pull request #125 from allenai/OPT175B
Browse files Browse the repository at this point in the history
OPT-175B
  • Loading branch information
dirkgr authored Mar 10, 2023
2 parents 753f60a + db87de1 commit bd5ccfa
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 1 deletion.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## Unreleased

### Added

- Support for OPT-175B (AI2 only)

### Fixed

- Fixed the way we compute SQuAD metrics.
Expand Down
4 changes: 4 additions & 0 deletions catwalk/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,10 @@ def _shorten_hf_name(hf_name: str) -> str:
MODELS[f"metaicl::{name}"] = MetaICLModel(hf_name)
MODELS[f"promptsource::{name}"] = PromptsourceDecoderOnlyRCModel(hf_name)

MODELS["rc::opt-175b"] = DecoderOnlyRCModel(
"/net/nfs.cirrascale/allennlp/opt/opt-175b-huggingface",
pretrained_tokenizer_name_or_path="facebook/opt-66b")


def short_name_for_model_object(model: Model) -> Optional[str]:
for model_name, model_object in MODELS.items():
Expand Down
10 changes: 9 additions & 1 deletion catwalk/models/rank_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def __init__(
self,
pretrained_model_name_or_path: str,
*,
pretrained_tokenizer_name_or_path: Optional[str] = None,
likelihood_averaging: str = 'token',
**model_kwargs
):
Expand All @@ -41,13 +42,20 @@ def __init__(
"""
assert likelihood_averaging in {'char', 'token'}
self.pretrained_model_name_or_path = pretrained_model_name_or_path
if pretrained_tokenizer_name_or_path is None:
pretrained_tokenizer_name_or_path = pretrained_model_name_or_path
self.pretrained_tokenizer_name_or_path = pretrained_tokenizer_name_or_path

self.likelihood_averaging = likelihood_averaging
self.model_kwargs = model_kwargs

@classmethod
def _make_model(cls, pretrained_model_name_or_path: str, *, make_copy: bool = False, **kwargs) -> _Model:
raise NotImplementedError

def _make_tokenizer(self) -> AutoTokenizer:
return cached_transformers.get_tokenizer(AutoTokenizer, self.pretrained_tokenizer_name_or_path)

def predict( # type: ignore
self,
task: Task,
Expand All @@ -62,7 +70,7 @@ def predict( # type: ignore
self.pretrained_model_name_or_path,
device_map="auto" if torch.cuda.device_count() > 0 else None,
**self.model_kwargs).eval()
tokenizer = cached_transformers.get_tokenizer(AutoTokenizer, self.pretrained_model_name_or_path)
tokenizer = self._make_tokenizer()

for instance_chunk in more_itertools.chunked(instances, max_instances_in_memory):
yield from self.predict_chunk(
Expand Down

0 comments on commit bd5ccfa

Please sign in to comment.