Skip to content

Commit 907019f

Browse files
vivekgoeastachowiczhabana
authored andcommitted
[SW-0] Clean deprecated flags usage
Change-Id: I1c2e2460dc2072ba7b311f239441b304694918c8
1 parent 03fa6dd commit 907019f

File tree

5 files changed

+15
-13
lines changed

5 files changed

+15
-13
lines changed

docs/source/package_reference/gaudi_config.mdx

+2-2
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ Here is a description of each configuration parameter:
2020
- `use_fused_adam` enables to decide whether to use the [custom fused implementation of the ADAM optimizer provided by Intel® Gaudi® AI Accelerator](https://docs.habana.ai/en/latest/PyTorch/Model_Optimization_PyTorch/Custom_Ops_PyTorch.html#custom-optimizers).
2121
- `use_fused_clip_norm` enables to decide whether to use the [custom fused implementation of gradient norm clipping provided by Intel® Gaudi® AI Accelerator](https://docs.habana.ai/en/latest/PyTorch/Model_Optimization_PyTorch/Custom_Ops_PyTorch.html#other-custom-ops).
2222
- `use_torch_autocast` enables PyTorch autocast; used to define good pre-defined config; users should favor `--bf16` training argument
23-
- `autocast_bf16_ops` list of operations that should be run with bf16 precision under autocast context; using environment flag LOWER_LIST is a preffered way for operator autocast list override
24-
- `autocast_fp32_ops` list of operations that should be run with fp32 precision under autocast context; using environment flag FP32_LIST is a preffered way for operator autocast list override
23+
- `autocast_bf16_ops` list of operations that should be run with bf16 precision under autocast context; using environment flag PT_HPU_AUTOCAST_LOWER_PRECISION_OPS_LIST is a preffered way for operator autocast list override
24+
- `autocast_fp32_ops` list of operations that should be run with fp32 precision under autocast context; using environment flag PT_HPU_AUTOCAST_FP32_OPS_LIST is a preffered way for operator autocast list override
2525

2626

2727
You can find examples of Gaudi configurations in the [Habana model repository on the Hugging Face Hub](https://huggingface.co/habana). For instance, [for BERT Large we have](https://huggingface.co/Habana/bert-large-uncased-whole-word-masking/blob/main/gaudi_config.json):

examples/language-modeling/README.md

+5-5
Original file line numberDiff line numberDiff line change
@@ -404,7 +404,7 @@ python3 run_lora_clm.py \
404404
```
405405
- Single-card finetuning of Falcon-40B:
406406
```bash
407-
LOWER_LIST=ops_bf16.txt python3 run_lora_clm.py \
407+
PT_HPU_AUTOCAST_LOWER_PRECISION_OPS_LIST=ops_bf16.txt python3 run_lora_clm.py \
408408
--model_name_or_path tiiuae/falcon-40b \
409409
--dataset_name timdettmers/openassistant-guanaco \
410410
--bf16 True \
@@ -474,7 +474,7 @@ python ../gaudi_spawn.py \
474474

475475
- Multi-card finetuning of Llama2-7B with FP8:
476476
```bash
477-
LOWER_LIST=ops_bf16.txt python ../gaudi_spawn.py \
477+
PT_HPU_AUTOCAST_LOWER_PRECISION_OPS_LIST=ops_bf16.txt python ../gaudi_spawn.py \
478478
--world_size 8 --use_mpi run_lora_clm.py \
479479
--model_name_or_path meta-llama/Llama-2-7b-hf \
480480
--dataset_name tatsu-lab/alpaca \
@@ -569,7 +569,7 @@ python ../gaudi_spawn.py \
569569

570570
- Multi-card finetuning of Falcon-40B:
571571
```bash
572-
LOWER_LIST=ops_bf16.txt python3 ../gaudi_spawn.py \
572+
PT_HPU_AUTOCAST_LOWER_PRECISION_OPS_LIST=ops_bf16.txt python3 ../gaudi_spawn.py \
573573
--world_size 8 --use_mpi run_lora_clm.py \
574574
--model_name_or_path tiiuae/falcon-40b \
575575
--dataset_name timdettmers/openassistant-guanaco \
@@ -647,7 +647,7 @@ python3 ../gaudi_spawn.py --use_deepspeed --world_size 8 run_lora_clm.py \
647647
- Multi-card finetuning of Llama2-70B with FSDP and LoRA:
648648

649649
```bash
650-
LOWER_LIST=ops_bf16.txt PT_HPU_LAZY_MODE=0 \
650+
PT_HPU_AUTOCAST_LOWER_PRECISION_OPS_LIST=ops_bf16.txt PT_HPU_LAZY_MODE=0 \
651651
python3 ../gaudi_spawn.py --world_size 8 --use_mpi run_lora_clm.py \
652652
--model_name_or_path meta-llama/Llama-2-70b-hf \
653653
--dataset_name tatsu-lab/alpaca \
@@ -690,7 +690,7 @@ python3 ../gaudi_spawn.py --world_size 8 --use_mpi run_lora_clm.py \
690690
- Falcon-180B example command saves only the LoRA parameters at end
691691
- For inference we need to merge the pretrained model and LoRA weights
692692
```bash
693-
DEEPSPEED_HPU_ZERO3_SYNC_MARK_STEP_REQUIRED=1 LOWER_LIST=ops_bf16.txt python3 ../gaudi_spawn.py \
693+
PT_HPU_AUTOCAST_LOWER_PRECISION_OPS_LIST=ops_bf16.txt python3 ../gaudi_spawn.py \
694694
--world_size 8 --use_deepspeed run_lora_clm.py \
695695
--model_name_or_path tiiuae/falcon-180B \
696696
--dataset_name timdettmers/openassistant-guanaco \

optimum/habana/transformers/gaudi_configuration.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -93,5 +93,5 @@ def declare_autocast_bf16_fp32_ops(self):
9393
autocast_bf16_filename,
9494
autocast_fp32_filename,
9595
)
96-
os.environ["LOWER_LIST"] = autocast_bf16_filename
97-
os.environ["FP32_LIST"] = autocast_fp32_filename
96+
os.environ["PT_HPU_AUTOCAST_LOWER_PRECISION_OPS_LIST"] = autocast_bf16_filename
97+
os.environ["PT_HPU_AUTOCAST_FP32_OPS_LIST"] = autocast_fp32_filename

optimum/habana/transformers/trainer.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,7 @@ def __init__(
252252
"The argument `--bf16` was not given but `use_torch_autocast` is True in the Gaudi configuration so mixed-precision training with Torch Autocast is enabled."
253253
)
254254

255-
if self.use_hpu_amp and "LOWER_LIST" not in os.environ:
255+
if self.use_hpu_amp and "PT_HPU_AUTOCAST_LOWER_PRECISION_OPS_LIST" not in os.environ:
256256
self.gaudi_config.declare_autocast_bf16_fp32_ops()
257257

258258
if self.args.use_lazy_mode:

tests/test_examples.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -439,7 +439,7 @@ def test(self):
439439

440440
env_variables = os.environ.copy()
441441
if "falcon" in model_name:
442-
env_variables["LOWER_LIST"] = str(example_script.parent / "ops_bf16.txt")
442+
env_variables["PT_HPU_AUTOCAST_LOWER_PRECISION_OPS_LIST"] = str(example_script.parent / "ops_bf16.txt")
443443
elif "flan" in model_name:
444444
env_variables["PT_HPU_MAX_COMPOUND_OP_SIZE"] = "512"
445445
elif "bloom" in model_name:
@@ -450,13 +450,15 @@ def test(self):
450450
env_variables["DEEPSPEED_HPU_ZERO3_SYNC_MARK_STEP_REQUIRED"] = "1"
451451
elif fsdp:
452452
if "llama" in model_name:
453-
env_variables["LOWER_LIST"] = str(example_script.parent / "ops_bf16.txt")
453+
env_variables["PT_HPU_AUTOCAST_LOWER_PRECISION_OPS_LIST"] = str(
454+
example_script.parent / "ops_bf16.txt"
455+
)
454456
env_variables["PT_HPU_LAZY_MODE"] = "0"
455457
elif deepspeed and "gpt-neox-20b" in model_name:
456458
env_variables["LD_PRELOAD"] = ""
457459

458460
if fp8 and "llama" in model_name:
459-
env_variables["LOWER_LIST"] = str(example_script.parent / "ops_bf16.txt")
461+
env_variables["PT_HPU_AUTOCAST_LOWER_PRECISION_OPS_LIST"] = str(example_script.parent / "ops_bf16.txt")
460462

461463
extra_command_line_arguments = baseline.get("distribution").get(distribution).get("extra_arguments", [])
462464

0 commit comments

Comments
 (0)