From ac61fbde379385a295a7631a238bd3a7a50e2fb6 Mon Sep 17 00:00:00 2001
From: Alexey Belyakov <alexey.belyakov@intel.com>
Date: Mon, 4 Nov 2024 13:36:28 +0000
Subject: [PATCH 1/2] update lm_eval version

---
 .../text-generation/requirements_lm_eval.txt  |  3 +--
 examples/text-generation/run_lm_eval.py       | 25 +++++--------------
 2 files changed, 7 insertions(+), 21 deletions(-)

diff --git a/examples/text-generation/requirements_lm_eval.txt b/examples/text-generation/requirements_lm_eval.txt
index e632dc1236..272b9365db 100644
--- a/examples/text-generation/requirements_lm_eval.txt
+++ b/examples/text-generation/requirements_lm_eval.txt
@@ -1,2 +1 @@
-https://github.com/EleutherAI/lm-evaluation-harness/archive/0bf683b4e6a9df359b3156ba9ba8d62bdd47e0c0.zip
-datasets==2.21.0
+https://github.com/EleutherAI/lm-evaluation-harness/archive/c1d8795da7610d507cb191c2769c5e7bf1060a35.zip
diff --git a/examples/text-generation/run_lm_eval.py b/examples/text-generation/run_lm_eval.py
index 3299cadcbe..6042e21640 100644
--- a/examples/text-generation/run_lm_eval.py
+++ b/examples/text-generation/run_lm_eval.py
@@ -29,6 +29,7 @@
 import psutil
 import torch
 import torch.nn.functional as F
+from lm_eval.models.huggingface import HFLM
 
 # Local imports
 from run_generation import setup_parser
@@ -91,17 +92,15 @@ def setup_lm_eval_parser():
     return args
 
 
-class HabanaModelAdapter(lm_eval.base.BaseLM):
+class HabanaModelAdapter(HFLM):
     def __init__(self, tokenizer, model, args, options):
-        super().__init__()
+        super().__init__(pretrained=model, tokenizer=tokenizer, batch_size=args.batch_size)
         self.tokenizer = tokenizer
-        self.model = model
-        self._batch_size = args.batch_size
         self.buckets = sorted(args.buckets)
         self.options = options
         self._device = args.device
         self.model_inputs = {"use_cache": self.options.use_cache}
-        if self.model.config.model_type in [
+        if self._model.config.model_type in [
             "llama",
             "mistral",
             "falcon",
@@ -112,11 +111,7 @@ def __init__(self, tokenizer, model, args, options):
             "starcoder2",
             "gemma",
         ]:
-            self.model_inputs.update(
-                {
-                    "reuse_cache": self.options.reuse_cache,
-                }
-            )
+            self.model_inputs.update({"reuse_cache": self.options.reuse_cache})
         if self.model.config.model_type in ["llama", "mistral", "qwen2", "falcon", "starcoder2", "gemma"]:
             if self.model.config.model_type != "falcon":
                 self.model_inputs.update(
@@ -136,7 +131,7 @@ def __init__(self, tokenizer, model, args, options):
 
     def warm_up(self):
         for bucket_size in reversed(self.buckets):
-            inps = torch.ones((self._batch_size, bucket_size), dtype=torch.int64)
+            inps = torch.ones((self.batch_size, bucket_size), dtype=torch.int64)
             self._model_call(inps)
             pass
 
@@ -148,14 +143,6 @@ def eot_token_id(self):
     def max_length(self):
         return self.buckets[-1]
 
-    @property
-    def max_gen_toks(self):
-        raise NotImplementedError()
-
-    @property
-    def batch_size(self):
-        return self._batch_size
-
     @property
     def device(self):
         # We need to do padding ourselves, otherwise we'll end up with recompilations

From 6d49307b705ac01b8f5cf10efec29105e92a653e Mon Sep 17 00:00:00 2001
From: regisss <15324346+regisss@users.noreply.github.com>
Date: Tue, 3 Dec 2024 10:44:19 +0000
Subject: [PATCH 2/2] Add instruction to README

---
 examples/text-generation/README.md | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/examples/text-generation/README.md b/examples/text-generation/README.md
index 9e7d728205..6da9bc8470 100755
--- a/examples/text-generation/README.md
+++ b/examples/text-generation/README.md
@@ -26,6 +26,11 @@ First, you should install the requirements:
 pip install -r requirements.txt
 ```
 
+For `run_lm_eval.py`:
+```bash
+pip install -r requirements_lm_eval.txt
+```
+
 Then, if you plan to use [DeepSpeed-inference](https://docs.habana.ai/en/latest/PyTorch/DeepSpeed/Inference_Using_DeepSpeed.html) (e.g. to use BLOOM/BLOOMZ), you should install DeepSpeed as follows:
 ```bash
 pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.18.0
@@ -258,7 +263,7 @@ While `--bucket_size` works for any model without model file changes, an even mo
 
 ### Using Beam Search
 
-> Restriction: When `reuse_cache` is not applied, currently beam search can only work for the models with model type of `llama` or `qwen2` since it requires `_reorder_cache` implemented in the modeling. The group beam search and constrained beam search is not supported by optimum-habana yet. 
+> Restriction: When `reuse_cache` is not applied, currently beam search can only work for the models with model type of `llama` or `qwen2` since it requires `_reorder_cache` implemented in the modeling. The group beam search and constrained beam search is not supported by optimum-habana yet.
 
 Here is an example:
 ```bash
@@ -652,7 +657,7 @@ and by adding the argument `--load_quantized_model_with_autogptq`.
 
 ***Note:***
 Setting the above environment variables improves performance. These variables will be removed in future releases.
- 
+
 
 Here is an example to run a quantized model <quantized_gptq_model>:
 ```bash