Skip to content

Commit

Permalink
Use only legacy optimizers in multi-optimizers
Browse files Browse the repository at this point in the history
  • Loading branch information
edknv committed Mar 28, 2023
1 parent 8be9b96 commit 7f8ca15
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 24 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/tensorflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ jobs:
matrix:
python-version: [ 3.8 ]
os: [ ubuntu-latest ]
tensorflow-version: ["~=2.8.0", "~=2.9.0", "~=2.10.0"]
tensorflow-version: ["~=2.8.0", "~=2.9.0", "~=2.10.0", "~=2.11.0"]

steps:
- uses: actions/checkout@v3
Expand Down
32 changes: 26 additions & 6 deletions merlin/models/tf/blocks/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@
#

import collections
import importlib
import warnings
from dataclasses import dataclass
from typing import Callable, List, Optional, Sequence, Tuple, Union

import numpy as np
import tensorflow as tf
from packaging import version

import merlin.models.tf as ml
from merlin.models.tf.core.base import Block
Expand All @@ -32,10 +32,10 @@
FloatTensorLike = Union[tf.Tensor, float, np.float16, np.float32, np.float64]


if importlib.util.find_spec("tensorflow.keras.optimizers.legacy") is not None:
keras_optimizers = tf.keras.optimizers.legacy
else:
if version.parse(tf.__version__) < version.parse("2.11.0"):
keras_optimizers = tf.keras.optimizers
else:
keras_optimizers = tf.keras.optimizers.legacy


@dataclass
Expand All @@ -51,8 +51,13 @@ class OptimizerBlocks:

def get_config(self):
"""return a tuple of serialized keras objects"""
optimizer_config = tf.keras.utils.serialize_keras_object(self.optimizer)
if version.parse(tf.__version__) >= version.parse("2.11.0") and isinstance(
self.optimizer, tf.keras.optimizers.legacy.Optimizer
):
optimizer_config["use_legacy_optimizer"] = True
return (
tf.keras.utils.serialize_keras_object(self.optimizer),
optimizer_config,
[tf.keras.utils.serialize_keras_object(block) for block in self.blocks],
)

Expand Down Expand Up @@ -130,7 +135,22 @@ def __init__(
self.default_optimizer = tf.keras.optimizers.get(default_optimizer)
self.optimizers_and_blocks = []
for i, pair in enumerate(optimizers_and_blocks):
pair.optimizer = tf.keras.optimizers.get(pair.optimizer)
if version.parse(tf.__version__) < version.parse("2.11.0"):
pair.optimizer = tf.keras.optimizers.get(pair.optimizer)
else:
if not (
isinstance(pair.optimizer, str)
or isinstance(pair.optimizer, tf.keras.optimizers.legacy.Optimizer)
):
raise ValueError(
"Optimizers must be a str or an instance of "
"tf.keras.optimizers.legacy.Optimizer with Tensorflow >= 2.11."
)
pair.optimizer = tf.keras.optimizers.get(
pair.optimizer,
use_legacy_optimizer=True,
)

self._track_trackable(pair.optimizer, name=f"Optimizer{i}")
pair.blocks = [pair.blocks] if isinstance(pair.blocks, Block) else pair.blocks
self.optimizers_and_blocks.append(pair)
Expand Down
56 changes: 42 additions & 14 deletions tests/unit/tf/blocks/test_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@
from merlin.models.utils.schema_utils import create_categorical_column
from merlin.schema import Schema, Tags

if version.parse(tf.__version__) < version.parse("2.11.0"):
keras_optimizers = tf.keras.optimizers
else:
keras_optimizers = tf.keras.optimizers.legacy


def generate_two_layers():
initializer_first_layer = tf.constant_initializer(np.ones((3, 4)))
Expand All @@ -44,14 +49,37 @@ def generate_two_layers():
return first_layer, second_layer


@pytest.mark.skipif
@pytest.mark.parametrize(
"optimizers",
[
("sgd", keras_optimizers.Adagrad()),
(keras_optimizers.SGD(), "adam"),
],
)
def test_new_optimizers_raise_error(optimizers):
layers = generate_two_layers()
with pytest.raises(ValueError) as excinfo:
ml.MultiOptimizer(
default_optimizer=optimizers[0],
optimizers_and_blocks=[
ml.OptimizerBlocks(optimizers[0], layers[0]),
ml.OptimizerBlocks(optimizers[1], layers[1]),
],
)
assert "Optimizers must be an instance of tf.keras.optimizers.legacy.Optimizer" in str(
excinfo.value
)


@pytest.mark.parametrize(
"optimizers",
[
("sgd", "adam"),
("rmsprop", "sgd"),
("adam", "adagrad"),
("adagrad", "rmsprop"),
(tf.keras.optimizers.legacy.SGD(), tf.keras.optimizers.legacy.Adagrad()),
(keras_optimizers.SGD(), keras_optimizers.Adagrad()),
],
)
def test_optimizers(optimizers):
Expand Down Expand Up @@ -120,8 +148,8 @@ def test_model_with_multi_optimizers(ecommerce_data, run_eagerly):
multi_optimizers = ml.MultiOptimizer(
default_optimizer="adam",
optimizers_and_blocks=[
ml.OptimizerBlocks(tf.keras.optimizers.SGD(), user_tower),
ml.OptimizerBlocks(tf.keras.optimizers.Adam(), item_tower),
ml.OptimizerBlocks(keras_optimizers.SGD(), user_tower),
ml.OptimizerBlocks(keras_optimizers.Adam(), item_tower),
],
)
testing_utils.model_test(
Expand All @@ -141,8 +169,8 @@ def test_multi_optimizer_list_input(ecommerce_data, run_eagerly):
model = ml.Model(two_tower, ml.BinaryClassificationTask("click"))
multi_optimizers = ml.MultiOptimizer(
optimizers_and_blocks=[
ml.OptimizerBlocks(tf.keras.optimizers.SGD(), user_tower),
ml.OptimizerBlocks(tf.keras.optimizers.Adam(), [item_tower, third_tower]),
ml.OptimizerBlocks(keras_optimizers.SGD(), user_tower),
ml.OptimizerBlocks(keras_optimizers.Adam(), [item_tower, third_tower]),
],
)
testing_utils.model_test(
Expand All @@ -163,8 +191,8 @@ def test_multi_optimizer_add(ecommerce_data, run_eagerly):
multi_optimizers = ml.MultiOptimizer(
default_optimizer="adam",
optimizers_and_blocks=[
ml.OptimizerBlocks(tf.keras.optimizers.SGD(), user_tower),
ml.OptimizerBlocks(tf.keras.optimizers.Adam(), item_tower),
ml.OptimizerBlocks(keras_optimizers.SGD(), user_tower),
ml.OptimizerBlocks(keras_optimizers.Adam(), item_tower),
],
)
multi_optimizers.add(ml.OptimizerBlocks("adagrad", third_tower))
Expand All @@ -180,7 +208,7 @@ def test_multi_optimizer_add(ecommerce_data, run_eagerly):
("rmsprop", "sgd"),
("adam", "adagrad"),
("adagrad", "rmsprop"),
(tf.keras.optimizers.SGD(), tf.keras.optimizers.Adagrad()),
(keras_optimizers.SGD(), keras_optimizers.Adagrad()),
],
)
def test_multi_optimizers_from_config(ecommerce_data, optimizers):
Expand Down Expand Up @@ -210,7 +238,7 @@ def test_multi_optimizers_from_config(ecommerce_data, optimizers):
"optimizers",
[
("sgd", "adam"),
(tf.keras.optimizers.SGD(), tf.keras.optimizers.Adagrad()),
(keras_optimizers.SGD(), keras_optimizers.Adagrad()),
],
)
def test_multi_optimizers_from_config_list_input(ecommerce_data, optimizers):
Expand Down Expand Up @@ -254,8 +282,8 @@ def test_examples_in_code_comments(ecommerce_data, use_default):
optimizer = ml.MultiOptimizer(
default_optimizer="adagrad",
optimizers_and_blocks=[
ml.OptimizerBlocks(tf.keras.optimizers.SGD(), user_tower),
ml.OptimizerBlocks(tf.keras.optimizers.Adam(), item_tower),
ml.OptimizerBlocks(keras_optimizers.SGD(), user_tower),
ml.OptimizerBlocks(keras_optimizers.Adam(), item_tower),
],
)
else:
Expand Down Expand Up @@ -283,8 +311,8 @@ def test_update_optimizer(ecommerce_data, run_eagerly):
user_tower_1 = ml.InputBlock(schema.select_by_tag(Tags.USER)).connect(ml.MLPBlock([256, 128]))
two_tower = ml.ParallelBlock({"user": user_tower_0, "item": user_tower_1}, aggregation="concat")
model = ml.Model(two_tower, ml.BinaryClassificationTask("click"))
sgd = tf.keras.optimizers.SGD()
adam = tf.keras.optimizers.Adam()
sgd = keras_optimizers.SGD()
adam = keras_optimizers.Adam()
multi_optimizers = ml.MultiOptimizer(
optimizers_and_blocks=[
ml.OptimizerBlocks(adam, user_tower_0),
Expand Down Expand Up @@ -560,7 +588,7 @@ def test_lazy_adam_in_model_with_multi_optimizers(ecommerce_data, run_eagerly):
multi_optimizers = ml.MultiOptimizer(
default_optimizer="adam",
optimizers_and_blocks=[
ml.OptimizerBlocks(tf.keras.optimizers.SGD(), user_tower),
ml.OptimizerBlocks(keras_optimizers.SGD(), user_tower),
ml.OptimizerBlocks(ml.LazyAdam(), item_tower),
],
)
Expand Down
12 changes: 10 additions & 2 deletions tests/unit/tf/horovod/test_horovod.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import importlib
import os

import pytest
Expand Down Expand Up @@ -69,13 +70,20 @@ def test_horovod_multigpu_dlrm(
prediction_tasks=mm.BinaryClassificationTask(target_column),
)

# TF 2.11
# Optimizer has to be an instance of tf.keras.optimizers.legacy.Optimizer
if importlib.util.find_spec("tensorflow.keras.optimizers.legacy") is not None:
keras_optimizers = tf.keras.optimizers.legacy
else:
keras_optimizers = tf.keras.optimizers

if custom_distributed_optimizer:
# Test for a case when the user uses a custom DistributedOptimizer.
# With a custom hvd.DistributedOptimzer, users have to adjust the learning rate.
opt = tf.keras.optimizers.Adagrad(learning_rate=learning_rate * hvd.size())
opt = keras_optimizers.Adagrad(learning_rate=learning_rate * hvd.size())
opt = hvd.DistributedOptimizer(opt, compression=hvd.Compression.fp16)
else:
opt = tf.keras.optimizers.Adagrad(learning_rate=learning_rate)
opt = keras_optimizers.Adagrad(learning_rate=learning_rate)

model.compile(optimizer=opt, run_eagerly=False, metrics=[tf.keras.metrics.AUC()])

Expand Down
2 changes: 1 addition & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ setenv =
commands =
conda update --yes --name base --channel defaults conda
conda env create --prefix {envdir}/env --file requirements/horovod-cpu-environment.yml --force
{envdir}/env/bin/python -m pip install horovod --no-cache-dir
{envdir}/env/bin/python -m pip install 'horovod==0.27.0' --no-cache-dir
{envdir}/env/bin/horovodrun --check-build
{envdir}/env/bin/python -m pip install --upgrade git+https://github.com/NVIDIA-Merlin/core.git
{envdir}/env/bin/python -m pip install --upgrade git+https://github.com/NVIDIA-Merlin/dataloader.git
Expand Down

0 comments on commit 7f8ca15

Please sign in to comment.