Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deprecates AdamW and adds --optim #14744

Merged
merged 55 commits into from
Jan 13, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
55 commits
Select commit Hold shift + click to select a range
32392af
Add AdamW deprecation warning
manuelciosici Dec 10, 2021
c637f37
Add --optim to Trainer
manuelciosici Dec 10, 2021
b4d0b6d
Update src/transformers/optimization.py
manuelciosici Dec 13, 2021
bcc2408
Update src/transformers/optimization.py
manuelciosici Dec 13, 2021
460eff4
Update src/transformers/optimization.py
manuelciosici Dec 13, 2021
6dc78a6
Update src/transformers/optimization.py
manuelciosici Dec 13, 2021
68dd581
Update src/transformers/training_args.py
manuelciosici Dec 13, 2021
9560350
Update src/transformers/training_args.py
manuelciosici Dec 13, 2021
01f1c7b
Update src/transformers/training_args.py
stas00 Dec 27, 2021
0c79a5f
Merge remote-tracking branch 'origin/master' into deprecate_adamw
stas00 Dec 27, 2021
7ec094f
fix style
stas00 Dec 27, 2021
1c9cccf
fix
stas00 Dec 29, 2021
9807d35
Regroup adamws together
manuelciosici Dec 30, 2021
7a063ab
Change --adafactor to --optim adafactor
manuelciosici Dec 30, 2021
d599a38
Use Enum for optimizer values
manuelciosici Dec 30, 2021
1f9210c
fixup! Change --adafactor to --optim adafactor
manuelciosici Dec 30, 2021
a80b39e
fixup! Change --adafactor to --optim adafactor
manuelciosici Dec 30, 2021
fdf40b2
fixup! Change --adafactor to --optim adafactor
manuelciosici Dec 30, 2021
d5dc69a
Merge branch 'master' into deprecate_adamw
manuelciosici Dec 30, 2021
0acba0c
fixup! Use Enum for optimizer values
manuelciosici Dec 30, 2021
2b7d9dd
Improved documentation for --adafactor
manuelciosici Dec 31, 2021
7c3139a
Add mention of no_deprecation_warning
manuelciosici Dec 31, 2021
234f7d1
Rename OptimizerOptions to OptimizerNames
manuelciosici Dec 31, 2021
1786d42
Use choices for --optim
manuelciosici Dec 31, 2021
210ed37
Move optimizer selection code to a function and add a unit test
manuelciosici Dec 31, 2021
7e62da9
Change optimizer names
manuelciosici Dec 31, 2021
0e7f955
Rename method
manuelciosici Jan 1, 2022
12a9e37
Rename method
manuelciosici Jan 1, 2022
c5853b0
Remove TODO comment
manuelciosici Jan 1, 2022
d59aa52
Rename variable
manuelciosici Jan 1, 2022
e7ffd71
Rename variable
manuelciosici Jan 1, 2022
b64fc03
Rename function
manuelciosici Jan 1, 2022
c5b5443
Rename variable
manuelciosici Jan 1, 2022
91aff78
Parameterize the tests for supported optimizers
manuelciosici Jan 1, 2022
f3505db
Refactor
manuelciosici Jan 1, 2022
91c35f2
Attempt to make tests pass on CircleCI
manuelciosici Jan 1, 2022
bcd8a0d
Add a test with apex
manuelciosici Jan 2, 2022
f8cb39c
rework to add apex to parameterized; add actual train test
stas00 Jan 2, 2022
98f0f2f
fix import when torch is not available
stas00 Jan 2, 2022
eba41bd
fix optim_test_params when torch is not available
stas00 Jan 2, 2022
aaee305
fix optim_test_params when torch is not available
stas00 Jan 2, 2022
071198c
re-org
stas00 Jan 2, 2022
182dac8
small re-org
stas00 Jan 2, 2022
2b46361
fix test_fused_adam_no_apex
stas00 Jan 2, 2022
470a1d7
Update src/transformers/training_args.py
manuelciosici Jan 12, 2022
cb85474
Update src/transformers/training_args.py
manuelciosici Jan 12, 2022
b2675f8
Update src/transformers/training_args.py
manuelciosici Jan 12, 2022
1e8acec
Remove .value from OptimizerNames
manuelciosici Jan 12, 2022
b32a194
Rename optimizer strings s|--adam_|--adamw_|
manuelciosici Jan 12, 2022
b839e80
Also rename Enum options
manuelciosici Jan 12, 2022
e73249c
small fix
stas00 Jan 12, 2022
7ac8dc0
Fix instantiation of OptimizerNames. Remove redundant test
manuelciosici Jan 12, 2022
a2363cd
Use ExplicitEnum instead of Enum
manuelciosici Jan 12, 2022
ea02877
Add unit test with string optimizer
manuelciosici Jan 12, 2022
ec92011
Change optimizer default to string value
manuelciosici Jan 13, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Move optimizer selection code to a function and add a unit test
  • Loading branch information
manuelciosici committed Dec 31, 2021
commit 210ed37fe9408183e378b5c47798bfb2b5482de5
77 changes: 43 additions & 34 deletions src/transformers/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -820,40 +820,7 @@ def create_optimizer(self):
},
]

optimizer_kwargs = {"lr": self.args.learning_rate}

adam_kwargs = {
"betas": (self.args.adam_beta1, self.args.adam_beta2),
"eps": self.args.adam_epsilon,
}

# TODO the following code is a good candidate for PEP 622 once Python 3.10 becomes the
# minimum required version. See, https://www.python.org/dev/peps/pep-0622/
if self.args.optim == OptimizerNames.ADAFACTOR.value:
optimizer_cls = Adafactor
optimizer_kwargs.update({"scale_parameter": False, "relative_step": False})
elif self.args.optim == OptimizerNames.ADAMW_HF.value:
from .optimization import AdamW

optimizer_cls = AdamW
optimizer_kwargs.update(adam_kwargs)
elif self.args.optim == OptimizerNames.ADAMW_TORCH.value:
from torch.optim import AdamW

optimizer_cls = AdamW
optimizer_kwargs.update(adam_kwargs)
elif self.args.optim == OptimizerNames.APEX_FUSED_ADAM.value:
try:
from apex.optimizers import FusedAdam

optimizer_cls = FusedAdam
optimizer_kwargs.update(adam_kwargs)
except ImportError:
raise ValueError(
"Trainer attempted to instantiate apex.optimizers.FusedAdam but apex is not installed!"
)
else:
raise ValueError(f"Trainer cannot instantiate unsupported optimizer: {self.args.optim}")
optimizer_cls, optimizer_kwargs = Trainer.get_optimizercls_and_params(self.args)

if self.sharded_ddp == ShardedDDPOption.SIMPLE:
self.optimizer = OSS(
Expand All @@ -869,6 +836,48 @@ def create_optimizer(self):

return self.optimizer

@staticmethod
def get_optimizercls_and_params(args: TrainingArguments) -> Tuple[Any, Any]:
"""
Returns the optimizer class and optimizer parameters based on the training arguments.

Args:
args (`transformers.training_args.TrainingArguments`):
The training arguments for the training session.

"""
optimizer_kwargs = {"lr": args.learning_rate}
adam_kwargs = {
"betas": (args.adam_beta1, args.adam_beta2),
"eps": args.adam_epsilon,
}
# TODO the following code is a good candidate for PEP 622 once Python 3.10 becomes the
# minimum required version. See, https://www.python.org/dev/peps/pep-0622/
if args.optim == OptimizerNames.ADAFACTOR.value:
optimizer_cls = Adafactor
optimizer_kwargs.update({"scale_parameter": False, "relative_step": False})
elif args.optim == OptimizerNames.ADAMW_HF.value:
from .optimization import AdamW

optimizer_cls = AdamW
optimizer_kwargs.update(adam_kwargs)
elif args.optim == OptimizerNames.ADAMW_TORCH.value:
from torch.optim import AdamW

optimizer_cls = AdamW
optimizer_kwargs.update(adam_kwargs)
elif args.optim == OptimizerNames.APEX_FUSED_ADAM.value:
try:
from apex.optimizers import FusedAdam

optimizer_cls = FusedAdam
optimizer_kwargs.update(adam_kwargs)
except ImportError:
raise ValueError("Trainer tried to instantiate apex FusedAdam but apex is not installed!")
else:
raise ValueError(f"Trainer cannot instantiate unsupported optimizer: {args.optim}")
return optimizer_cls, optimizer_kwargs

def create_scheduler(self, num_training_steps: int, optimizer: torch.optim.Optimizer = None):
"""
Setup the scheduler. The optimizer of the trainer must have been set up either before this method is called or
Expand Down
84 changes: 84 additions & 0 deletions tests/test_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import tempfile
import unittest
from pathlib import Path
from unittest.mock import Mock, patch

import numpy as np

Expand Down Expand Up @@ -61,6 +62,7 @@
slow,
)
from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR
from transformers.training_args import OptimizerNames
from transformers.utils.hp_naming import TrialShortNamer


Expand Down Expand Up @@ -1690,3 +1692,85 @@ def hp_name(trial):
trainer.hyperparameter_search(
direction="minimize", hp_space=hp_space, hp_name=hp_name, backend="sigopt", n_trials=4
)


@require_torch
class TrainerOptimizerChoiceTest(unittest.TestCase):
def test_invalid_optimizer(self):
args = TrainingArguments(optim="bla", output_dir="None")
with self.assertRaises(ValueError):
Trainer.get_optimizercls_and_params(args)

def check_optim(self, args, mandatory_params, expected_cls):
"""
Checks that the common case for an optimizer works.
"""
actual_cls, optim_params = Trainer.get_optimizercls_and_params(args)
self.assertEqual(expected_cls, actual_cls)
self.assertIsNotNone(optim_params)

for p, v in mandatory_params.items():
self.assertTrue(p in optim_params)
actual_v = optim_params[p]
self.assertTrue(actual_v == v, f"Failed check for {p}. Expected {v}, but got {actual_v}.")

def test_adafactor(self):
from transformers.optimization import Adafactor

args = TrainingArguments(optim=OptimizerNames.ADAFACTOR.value, output_dir="None")

mandatory_params = {"scale_parameter": False, "relative_step": False}

self.check_optim(args, mandatory_params, Adafactor)

def test_adam_hf(self):
from transformers.optimization import AdamW

args = TrainingArguments(optim=OptimizerNames.ADAMW_HF.value, output_dir="None", learning_rate=0.3)

mandatory_params = {
"betas": (args.adam_beta1, args.adam_beta2),
"eps": args.adam_epsilon,
"lr": args.learning_rate,
}

self.check_optim(args, mandatory_params, AdamW)

def test_adam_torch(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is almost identical to the previous test - use parameterized and have 2 tests with a single body?

This could include the real apex test as well, except it'd need to skip if is_apex_available is False.

from transformers.file_utils import is_apex_available
...

from torch.optim import AdamW

args = TrainingArguments(optim=OptimizerNames.ADAMW_TORCH.value, output_dir="None", learning_rate=0.3)

mandatory_params = {
"betas": (args.adam_beta1, args.adam_beta2),
"eps": args.adam_epsilon,
"lr": args.learning_rate,
}

self.check_optim(args, mandatory_params, AdamW)

def test_fused_adam(self):
args = TrainingArguments(optim=OptimizerNames.APEX_FUSED_ADAM.value, output_dir="None", learning_rate=0.3)

mandatory_params = {
"betas": (args.adam_beta1, args.adam_beta2),
"eps": args.adam_epsilon,
"lr": args.learning_rate,
}

mock = Mock()
modules = {
"apex": mock,
"apex.optimizers": mock.optimizers,
"apex.optimizers.FusedAdam": mock.optimizers.FusedAdam,
}
with patch.dict("sys.modules", modules):
self.check_optim(args, mandatory_params, mock.optimizers.FusedAdam)

def test_fused_adam_no_apex(self):
args = TrainingArguments(optim=OptimizerNames.APEX_FUSED_ADAM.value, output_dir="None")

# Pretend that apex does not exist, even if installed.
with patch.dict("sys.modules", {"apex": None}):
with self.assertRaises(ValueError):
Trainer.get_optimizercls_and_params(args)