huggingface · regisss · Dec 3, 2024 · Nov 4, 2024 · Dec 3, 2024 · Dec 3, 2024
@@ -1,2 +1 @@
-https://github.com/EleutherAI/lm-evaluation-harness/archive/0bf683b4e6a9df359b3156ba9ba8d62bdd47e0c0.zip
-datasets==2.21.0
+https://github.com/EleutherAI/lm-evaluation-harness/archive/c1d8795da7610d507cb191c2769c5e7bf1060a35.zip
@@ -29,6 +29,7 @@
 import psutil
 import torch
 import torch.nn.functional as F
+from lm_eval.models.huggingface import HFLM
 
 # Local imports
 from run_generation import setup_parser
@@ -91,17 +92,15 @@ def setup_lm_eval_parser():
     return args
 
 
-class HabanaModelAdapter(lm_eval.base.BaseLM):
+class HabanaModelAdapter(HFLM):
     def __init__(self, tokenizer, model, args, options):
-        super().__init__()
+        super().__init__(pretrained=model, tokenizer=tokenizer, batch_size=args.batch_size)
         self.tokenizer = tokenizer
-        self.model = model
-        self._batch_size = args.batch_size
         self.buckets = sorted(args.buckets)
         self.options = options
         self._device = args.device
         self.model_inputs = {"use_cache": self.options.use_cache}
-        if self.model.config.model_type in [
+        if self._model.config.model_type in [
             "llama",
             "mistral",
             "falcon",
@@ -112,11 +111,7 @@ def __init__(self, tokenizer, model, args, options):
             "starcoder2",
             "gemma",
         ]:
-            self.model_inputs.update(
-                {
-                    "reuse_cache": self.options.reuse_cache,
-                }
-            )
+            self.model_inputs.update({"reuse_cache": self.options.reuse_cache})
         if self.model.config.model_type in ["llama", "mistral", "qwen2", "falcon", "starcoder2", "gemma"]:
             if self.model.config.model_type != "falcon":
                 self.model_inputs.update(
@@ -136,7 +131,7 @@ def __init__(self, tokenizer, model, args, options):
 
     def warm_up(self):
         for bucket_size in reversed(self.buckets):
-            inps = torch.ones((self._batch_size, bucket_size), dtype=torch.int64)
+            inps = torch.ones((self.batch_size, bucket_size), dtype=torch.int64)
             self._model_call(inps)
             pass
 
@@ -148,14 +143,6 @@ def eot_token_id(self):
     def max_length(self):
         return self.buckets[-1]
 
-    @property
-    def max_gen_toks(self):
-        raise NotImplementedError()
-
-    @property
-    def batch_size(self):
-        return self._batch_size
-
     @property
     def device(self):
         # We need to do padding ourselves, otherwise we'll end up with recompilations