From ac61fbde379385a295a7631a238bd3a7a50e2fb6 Mon Sep 17 00:00:00 2001 From: Alexey Belyakov Date: Mon, 4 Nov 2024 13:36:28 +0000 Subject: [PATCH 1/2] update lm_eval version --- .../text-generation/requirements_lm_eval.txt | 3 +-- examples/text-generation/run_lm_eval.py | 25 +++++-------------- 2 files changed, 7 insertions(+), 21 deletions(-) diff --git a/examples/text-generation/requirements_lm_eval.txt b/examples/text-generation/requirements_lm_eval.txt index e632dc1236..272b9365db 100644 --- a/examples/text-generation/requirements_lm_eval.txt +++ b/examples/text-generation/requirements_lm_eval.txt @@ -1,2 +1 @@ -https://github.com/EleutherAI/lm-evaluation-harness/archive/0bf683b4e6a9df359b3156ba9ba8d62bdd47e0c0.zip -datasets==2.21.0 +https://github.com/EleutherAI/lm-evaluation-harness/archive/c1d8795da7610d507cb191c2769c5e7bf1060a35.zip diff --git a/examples/text-generation/run_lm_eval.py b/examples/text-generation/run_lm_eval.py index 3299cadcbe..6042e21640 100644 --- a/examples/text-generation/run_lm_eval.py +++ b/examples/text-generation/run_lm_eval.py @@ -29,6 +29,7 @@ import psutil import torch import torch.nn.functional as F +from lm_eval.models.huggingface import HFLM # Local imports from run_generation import setup_parser @@ -91,17 +92,15 @@ def setup_lm_eval_parser(): return args -class HabanaModelAdapter(lm_eval.base.BaseLM): +class HabanaModelAdapter(HFLM): def __init__(self, tokenizer, model, args, options): - super().__init__() + super().__init__(pretrained=model, tokenizer=tokenizer, batch_size=args.batch_size) self.tokenizer = tokenizer - self.model = model - self._batch_size = args.batch_size self.buckets = sorted(args.buckets) self.options = options self._device = args.device self.model_inputs = {"use_cache": self.options.use_cache} - if self.model.config.model_type in [ + if self._model.config.model_type in [ "llama", "mistral", "falcon", @@ -112,11 +111,7 @@ def __init__(self, tokenizer, model, args, options): "starcoder2", "gemma", ]: - self.model_inputs.update( - { - "reuse_cache": self.options.reuse_cache, - } - ) + self.model_inputs.update({"reuse_cache": self.options.reuse_cache}) if self.model.config.model_type in ["llama", "mistral", "qwen2", "falcon", "starcoder2", "gemma"]: if self.model.config.model_type != "falcon": self.model_inputs.update( @@ -136,7 +131,7 @@ def __init__(self, tokenizer, model, args, options): def warm_up(self): for bucket_size in reversed(self.buckets): - inps = torch.ones((self._batch_size, bucket_size), dtype=torch.int64) + inps = torch.ones((self.batch_size, bucket_size), dtype=torch.int64) self._model_call(inps) pass @@ -148,14 +143,6 @@ def eot_token_id(self): def max_length(self): return self.buckets[-1] - @property - def max_gen_toks(self): - raise NotImplementedError() - - @property - def batch_size(self): - return self._batch_size - @property def device(self): # We need to do padding ourselves, otherwise we'll end up with recompilations From 6d49307b705ac01b8f5cf10efec29105e92a653e Mon Sep 17 00:00:00 2001 From: regisss <15324346+regisss@users.noreply.github.com> Date: Tue, 3 Dec 2024 10:44:19 +0000 Subject: [PATCH 2/2] Add instruction to README --- examples/text-generation/README.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/examples/text-generation/README.md b/examples/text-generation/README.md index 9e7d728205..6da9bc8470 100755 --- a/examples/text-generation/README.md +++ b/examples/text-generation/README.md @@ -26,6 +26,11 @@ First, you should install the requirements: pip install -r requirements.txt ``` +For `run_lm_eval.py`: +```bash +pip install -r requirements_lm_eval.txt +``` + Then, if you plan to use [DeepSpeed-inference](https://docs.habana.ai/en/latest/PyTorch/DeepSpeed/Inference_Using_DeepSpeed.html) (e.g. to use BLOOM/BLOOMZ), you should install DeepSpeed as follows: ```bash pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.18.0 @@ -258,7 +263,7 @@ While `--bucket_size` works for any model without model file changes, an even mo ### Using Beam Search -> Restriction: When `reuse_cache` is not applied, currently beam search can only work for the models with model type of `llama` or `qwen2` since it requires `_reorder_cache` implemented in the modeling. The group beam search and constrained beam search is not supported by optimum-habana yet. +> Restriction: When `reuse_cache` is not applied, currently beam search can only work for the models with model type of `llama` or `qwen2` since it requires `_reorder_cache` implemented in the modeling. The group beam search and constrained beam search is not supported by optimum-habana yet. Here is an example: ```bash @@ -652,7 +657,7 @@ and by adding the argument `--load_quantized_model_with_autogptq`. ***Note:*** Setting the above environment variables improves performance. These variables will be removed in future releases. - + Here is an example to run a quantized model : ```bash