Update lm_eval version (#1473)

Co-authored-by: regisss <15324346+regisss@users.noreply.github.com>
huggingface · Dec 3, 2024 · bfc9233 · bfc9233
1 parent e883691
commit bfc9233
Show file tree

Hide file tree

Showing 3 changed files with 13 additions and 18 deletions.
diff --git a/examples/text-generation/README.md b/examples/text-generation/README.md
@@ -26,6 +26,11 @@ First, you should install the requirements:
 pip install -r requirements.txt
 ```
 
+For `run_lm_eval.py`:
+```bash
+pip install -r requirements_lm_eval.txt
+```
+
 Then, if you plan to use [DeepSpeed-inference](https://docs.habana.ai/en/latest/PyTorch/DeepSpeed/Inference_Using_DeepSpeed.html) (e.g. to use BLOOM/BLOOMZ), you should install DeepSpeed as follows:
 ```bash
 pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.18.0
@@ -258,7 +263,7 @@ While `--bucket_size` works for any model without model file changes, an even mo
 
 ### Using Beam Search
 
-> Restriction: When `reuse_cache` is not applied, currently beam search can only work for the models with model type of `llama` or `qwen2` since it requires `_reorder_cache` implemented in the modeling. The group beam search and constrained beam search is not supported by optimum-habana yet. 
+> Restriction: When `reuse_cache` is not applied, currently beam search can only work for the models with model type of `llama` or `qwen2` since it requires `_reorder_cache` implemented in the modeling. The group beam search and constrained beam search is not supported by optimum-habana yet.
 
 Here is an example:
 ```bash
@@ -652,7 +657,7 @@ and by adding the argument `--load_quantized_model_with_autogptq`.
 
 ***Note:***
 Setting the above environment variables improves performance. These variables will be removed in future releases.
- 
+
 
 Here is an example to run a quantized model <quantized_gptq_model>:
 ```bash

diff --git a/examples/text-generation/requirements_lm_eval.txt b/examples/text-generation/requirements_lm_eval.txt
@@ -1,2 +1 @@
-https://github.com/EleutherAI/lm-evaluation-harness/archive/0bf683b4e6a9df359b3156ba9ba8d62bdd47e0c0.zip
-datasets==2.21.0
+https://github.com/EleutherAI/lm-evaluation-harness/archive/c1d8795da7610d507cb191c2769c5e7bf1060a35.zip
diff --git a/examples/text-generation/run_lm_eval.py b/examples/text-generation/run_lm_eval.py
@@ -29,6 +29,7 @@
 import psutil
 import torch
 import torch.nn.functional as F
+from lm_eval.models.huggingface import HFLM
 
 # Local imports
 from run_generation import setup_parser
@@ -91,17 +92,15 @@ def setup_lm_eval_parser():
     return args
 
 
-class HabanaModelAdapter(lm_eval.base.BaseLM):
+class HabanaModelAdapter(HFLM):
     def __init__(self, tokenizer, model, args, options):
-        super().__init__()
+        super().__init__(pretrained=model, tokenizer=tokenizer, batch_size=args.batch_size)
         self.tokenizer = tokenizer
-        self.model = model
-        self._batch_size = args.batch_size
         self.buckets = sorted(args.buckets)
         self.options = options
         self._device = args.device
         self.model_inputs = {"use_cache": self.options.use_cache}
-        if self.model.config.model_type in [
+        if self._model.config.model_type in [
             "llama",
             "mistral",
             "falcon",
@@ -137,7 +136,7 @@ def __init__(self, tokenizer, model, args, options):
 
     def warm_up(self):
         for bucket_size in reversed(self.buckets):
-            inps = torch.ones((self._batch_size, bucket_size), dtype=torch.int64)
+            inps = torch.ones((self.batch_size, bucket_size), dtype=torch.int64)
             self._model_call(inps)
             pass
 
@@ -149,14 +148,6 @@ def eot_token_id(self):
     def max_length(self):
         return self.buckets[-1]
 
-    @property
-    def max_gen_toks(self):
-        raise NotImplementedError()
-
-    @property
-    def batch_size(self):
-        return self._batch_size
-
     @property
     def device(self):
         # We need to do padding ourselves, otherwise we'll end up with recompilations