Sync OSS keras to head.

PiperOrigin-RevId: 365861361
itrushkin · Mar 30, 2021 · 8a96b50 · 8a96b50
1 parent e1c2a08
commit 8a96b50
Show file tree

Hide file tree

Showing 25 changed files with 124 additions and 99 deletions.
diff --git a/keras/__init__.py b/keras/__init__.py
@@ -30,6 +30,6 @@
 
 from tensorflow.python.util.tf_export import keras_export
 
-__version__ = '2.5.0'
+__version__ = '2.6.0'
 
 keras_export('keras.__version__').export_constant(__name__, '__version__')
diff --git a/keras/callbacks.py b/keras/callbacks.py
@@ -1993,7 +1993,7 @@ def keras_model_summary(name, data, step=None):
     json_string = data.to_json()
   except Exception as exc:  # pylint: disable=broad-except
     # An exception should not break a model code.
-    logging.warn('Model failed to serialize as JSON. Ignoring... %s', exc)
+    logging.warning('Model failed to serialize as JSON. Ignoring... %s', exc)
     return False
 
   with tf.summary.experimental.summary_scope(name, 'graph_keras_model',

diff --git a/keras/callbacks_test.py b/keras/callbacks_test.py
@@ -2808,7 +2808,7 @@ def call(self, inputs):
         return self.activation(x)
 
     model = SimpleSubclass()
-    with tf.compat.v1.test.mock.patch.object(logging, 'warn') as mock_log:
+    with tf.compat.v1.test.mock.patch.object(logging, 'warning') as mock_log:
       self.assertFalse(
           keras.callbacks.keras_model_summary(
               name='my_name', data=model, step=1))
@@ -2820,7 +2820,7 @@ def testKerasModel_otherExceptions(self):
     model = keras.Sequential()
 
     with tf.compat.v1.test.mock.patch.object(model, 'to_json') as mock_to_json:
-      with tf.compat.v1.test.mock.patch.object(logging, 'warn') as mock_log:
+      with tf.compat.v1.test.mock.patch.object(logging, 'warning') as mock_log:
         mock_to_json.side_effect = Exception('oops')
         self.assertFalse(
             keras.callbacks.keras_model_summary(

diff --git a/keras/distribute/keras_image_model_correctness_test.py b/keras/distribute/keras_image_model_correctness_test.py
@@ -99,6 +99,9 @@ def get_data_with_partial_last_batch_eval(self):
       keras_correctness_test_base.all_strategy_and_input_config_combinations() +
       keras_correctness_test_base.multi_worker_mirrored_eager())
   def test_cnn_correctness(self, distribution, use_numpy, use_validation_data):
+    if (distribution ==
+        tf.__internal__.distribute.combinations.central_storage_strategy_with_gpu_and_cpu):
+      self.skipTest('b/183958183')
     self.run_correctness_test(distribution, use_numpy, use_validation_data)
 
   @tf.__internal__.distribute.combinations.generate(

diff --git a/keras/distribute/multi_worker_test.py b/keras/distribute/multi_worker_test.py
@@ -249,7 +249,8 @@ class KPLMultiWorkerTest(tf.test.TestCase,
           use_adapt=[False],  # TODO(b/180742437): Add tests for using adapt.
           strategy=[
               tf.__internal__.distribute.combinations.multi_worker_mirrored_2x1_gpu,
-              tf.__internal__.distribute.combinations.multi_worker_mirrored_2x2_gpu,
+              # TODO(b/183956672): Re-enable
+              # strategy_combinations.multi_worker_mirrored_2x2_gpu,
           ]))
   def testTrainAndServeWithKPL(self, use_adapt, strategy):
     test_utils_obj = kpl_test_utils.DistributeKplTestUtils()

diff --git a/keras/engine/base_layer.py b/keras/engine/base_layer.py
@@ -608,8 +608,9 @@ def getter(*args, **kwargs):  # pylint: disable=function-redefined
       # disable it if it is specified.
       # TODO(b/142020079): Reenable it once the bug is fixed.
       if caching_device is not None:
-        tf_logging.warn('`caching_device` does not work with mixed precision '
-                        'API. Ignoring user specified `caching_device`.')
+        tf_logging.warning(
+            '`caching_device` does not work with mixed precision API. Ignoring '
+            'user specified `caching_device`.')
         caching_device = None
 
     variable = self._add_variable_with_custom_getter(

diff --git a/keras/engine/base_layer_v1.py b/keras/engine/base_layer_v1.py
@@ -416,8 +416,9 @@ def getter(*args, **kwargs):  # pylint: disable=function-redefined
       # disable it if it is specified.
       # TODO(b/142020079): Reenable it once the bug is fixed.
       if caching_device is not None:
-        tf_logging.warn('`caching_device` does not work with mixed precision '
-                        'API. Ignoring user specified `caching_device`.')
+        tf_logging.warning(
+            '`caching_device` does not work with mixed precision API. Ignoring '
+            'user specified `caching_device`.')
         caching_device = None
 
     variable = self._add_variable_with_custom_getter(

diff --git a/keras/engine/data_adapter.py b/keras/engine/data_adapter.py
@@ -426,7 +426,7 @@ def _is_array_like(v):
       return False
 
   def __init__(self, *args, **kwargs):
-    logging.warn(
+    logging.warning(
         "Keras is training/fitting/evaluating on array-like data. Keras may "
         "not be optimized for this format, so if your input data format is "
         "supported by TensorFlow I/O (https://github.com/tensorflow/io) we "

diff --git a/keras/engine/training.py b/keras/engine/training.py
@@ -531,9 +531,9 @@ def compile(self,
     base_layer.keras_api_gauge.get_cell('compile').set(True)
     with self.distribute_strategy.scope():
       if 'experimental_steps_per_execution' in kwargs:
-        logging.warn('The argument `steps_per_execution` is no longer '
-                     'experimental. Pass `steps_per_execution` instead of '
-                     '`experimental_steps_per_execution`.')
+        logging.warning('The argument `steps_per_execution` is no longer '
+                        'experimental. Pass `steps_per_execution` instead of '
+                        '`experimental_steps_per_execution`.')
         if not steps_per_execution:
           steps_per_execution = kwargs.pop('experimental_steps_per_execution')
 
@@ -741,6 +741,8 @@ def train_step(self, data):
     """The logic for one training step.
 
     This method can be overridden to support custom training logic.
+    For concrete examples of how to override this method see
+    [Customizing what happends in fit](https://www.tensorflow.org/guide/keras/customizing_what_happens_in_fit).
     This method is called by `Model.make_train_function`.
 
     This method should contain the mathematical logic for one step of training.

diff --git a/keras/layers/core.py b/keras/layers/core.py
@@ -945,7 +945,7 @@ def _check_variables(self, created_variables, accessed_variables):
   def _warn(self, msg):
     # This method will be overridden in a unit test to raise an error, because
     # self.assertWarns is not universally implemented.
-    return tf_logging.warn(msg)
+    return tf_logging.warning(msg)
 
   def compute_mask(self, inputs, mask=None):
     if callable(self.mask):
@@ -1421,7 +1421,7 @@ def _check_variables(self, created_variables, accessed_variables):
   def _warn(self, msg):
     # This method will be overridden in a unit test to raise an error, because
     # self.assertWarns is not universally implemented.
-    return tf_logging.warn(msg)
+    return tf_logging.warning(msg)
 
   def get_config(self):
     if not self.symbol:

diff --git a/keras/layers/embeddings.py b/keras/layers/embeddings.py
@@ -15,6 +15,8 @@
 """Embedding layer."""
 
 import tensorflow.compat.v2 as tf
+# pylint: disable=g-classes-have-attributes
+
 from keras import backend
 from keras import constraints
 from keras import initializers
@@ -77,6 +79,28 @@ class Embedding(Layer):
 
   Output shape:
     3D tensor with shape: `(batch_size, input_length, output_dim)`.
+
+  **Note on variable placement:**
+  By default, if a GPU is available, the embedding matrix will be placed on
+  the GPU. This achieves the best performance, but it might cause issues:
+
+  - You may be using an optimizer that does not support sparse GPU kernels.
+  In this case you will see an error upon training your model.
+  - Your embedding matrix may be too large to fit on your GPU. In this case
+  you will see an Out Of Memory (OOM) error.
+
+  In such cases, you should place the embedding matrix on the CPU memory.
+  You can do so with a device scope, as such:
+
+  ```python
+  with tf.device('cpu:0'):
+    embedding_layer = Embedding(...)
+    embedding_layer.build()
+  ```
+
+  The pre-built `embedding_layer` instance can then be added to a `Sequential`
+  model (e.g. `model.add(embedding_layer)`), called in a Functional model
+  (e.g. `x = embedding_layer(x)`), or used in a subclassed model.
   """
 
   def __init__(self,
@@ -121,36 +145,19 @@ def __init__(self,
     self.input_length = input_length
 
   @tf_utils.shape_type_conversion
-  def build(self, input_shape):
-    # Note: most sparse optimizers do not have GPU kernels defined. When
-    # building graphs, the placement algorithm is able to place variables on CPU
-    # since it knows all kernels using the variable only exist on CPU.
-    # When eager execution is enabled, the placement decision has to be made
-    # right now. Checking for the presence of GPUs to avoid complicating the
-    # TPU codepaths which can handle sparse optimizers.
-    if tf.executing_eagerly() and tf.config.list_logical_devices('GPU'):
-      with tf.compat.v1.device('cpu:0'):
-        self.embeddings = self.add_weight(
-            shape=(self.input_dim, self.output_dim),
-            initializer=self.embeddings_initializer,
-            name='embeddings',
-            regularizer=self.embeddings_regularizer,
-            constraint=self.embeddings_constraint,
-            experimental_autocast=False)
-    else:
-      self.embeddings = self.add_weight(
-          shape=(self.input_dim, self.output_dim),
-          initializer=self.embeddings_initializer,
-          name='embeddings',
-          regularizer=self.embeddings_regularizer,
-          constraint=self.embeddings_constraint,
-          experimental_autocast=False)
+  def build(self, input_shape=None):
+    self.embeddings = self.add_weight(
+        shape=(self.input_dim, self.output_dim),
+        initializer=self.embeddings_initializer,
+        name='embeddings',
+        regularizer=self.embeddings_regularizer,
+        constraint=self.embeddings_constraint,
+        experimental_autocast=False)
     self.built = True
 
   def compute_mask(self, inputs, mask=None):
     if not self.mask_zero:
       return None
-
     return tf.not_equal(inputs, 0)
 
   @tf_utils.shape_type_conversion

diff --git a/keras/layers/legacy_rnn/rnn_cell_impl.py b/keras/layers/legacy_rnn/rnn_cell_impl.py
@@ -416,7 +416,7 @@ def __init__(self,
         _reuse=reuse, name=name, dtype=dtype, **kwargs)
     _check_supported_dtypes(self.dtype)
     if tf.executing_eagerly() and tf.config.list_logical_devices("GPU"):
-      logging.warn(
+      logging.warning(
           "%s: Note that this cell is not optimized for performance. "
           "Please use tf.contrib.cudnn_rnn.CudnnRNNTanh for better "
           "performance on GPU.", self)
@@ -525,7 +525,7 @@ def __init__(self,
     _check_supported_dtypes(self.dtype)
 
     if tf.executing_eagerly() and tf.config.list_logical_devices("GPU"):
-      logging.warn(
+      logging.warning(
           "%s: Note that this cell is not optimized for performance. "
           "Please use tf.contrib.cudnn_rnn.CudnnGRU for better "
           "performance on GPU.", self)
@@ -697,11 +697,11 @@ def __init__(self,
         _reuse=reuse, name=name, dtype=dtype, **kwargs)
     _check_supported_dtypes(self.dtype)
     if not state_is_tuple:
-      logging.warn(
+      logging.warning(
           "%s: Using a concatenated state is slower and will soon be "
           "deprecated.  Use state_is_tuple=True.", self)
     if tf.executing_eagerly() and tf.config.list_logical_devices("GPU"):
-      logging.warn(
+      logging.warning(
           "%s: Note that this cell is not optimized for performance. "
           "Please use tf.contrib.cudnn_rnn.CudnnLSTM for better "
           "performance on GPU.", self)
@@ -898,16 +898,16 @@ def __init__(self,
         _reuse=reuse, name=name, dtype=dtype, **kwargs)
     _check_supported_dtypes(self.dtype)
     if not state_is_tuple:
-      logging.warn(
+      logging.warning(
           "%s: Using a concatenated state is slower and will soon be "
           "deprecated.  Use state_is_tuple=True.", self)
     if num_unit_shards is not None or num_proj_shards is not None:
-      logging.warn(
+      logging.warning(
           "%s: The num_unit_shards and proj_unit_shards parameters are "
           "deprecated and will be removed in Jan 2017.  "
           "Use a variable scope with a partitioner instead.", self)
     if tf.executing_eagerly() and tf.config.list_logical_devices("GPU"):
-      logging.warn(
+      logging.warning(
           "%s: Note that this cell is not optimized for performance. "
           "Please use tf.contrib.cudnn_rnn.CudnnLSTM for better "
           "performance on GPU.", self)

diff --git a/keras/layers/preprocessing/BUILD b/keras/layers/preprocessing/BUILD
@@ -327,7 +327,10 @@ distribute_py_test(
         "multi_and_single_gpu",
         "nomultivm",  # TODO(b/170502145)
     ],
-    tpu_tags = ["no_oss"],
+    tpu_tags = [
+        "no_oss",
+        "noguitar",  # TODO(b/183957207)
+    ],
     deps = [
         ":image_preprocessing",
         ":preprocessing_test_utils",
@@ -410,6 +413,7 @@ tf_py_test(
     size = "medium",
     srcs = ["index_lookup_test.py"],
     python_version = "PY3",
+    tags = ["noasan"],  # TODO(b/183961255)
     deps = [
         ":index_lookup",
         ":preprocessing_test_utils",
@@ -480,6 +484,7 @@ tf_py_test(
     size = "medium",
     srcs = ["integer_lookup_test.py"],
     python_version = "PY3",
+    tags = ["noasan"],  # TODO(b/183961255)
     deps = [
         ":integer_lookup",
         ":preprocessing_test_utils",

diff --git a/keras/layers/preprocessing/index_lookup.py b/keras/layers/preprocessing/index_lookup.py
@@ -20,7 +20,6 @@
 import collections
 import json
 import operator
-import os
 
 import numpy as np
 from keras import backend
@@ -260,7 +259,7 @@ def __init__(self,
         # the actual data.
         initializer = _NullInitializer(self._key_dtype, self._value_dtype)
       else:
-        if not os.path.exists(vocabulary):
+        if not tf.compat.v1.gfile.Exists(vocabulary):
           raise ValueError("Vocabulary file %s does not exist." % (vocabulary,))
         self._static_vocabulary_path = vocabulary
         num_tokens = table_utils.num_tokens_in_file(vocabulary)

diff --git a/keras/layers/recurrent.py b/keras/layers/recurrent.py
@@ -3035,22 +3035,23 @@ def _caching_device(rnn_cell):
   # prevents forward computations in loop iterations from re-reading the
   # updated weights.
   if control_flow_util.IsInWhileLoop(tf.compat.v1.get_default_graph()):
-    logging.warn('Variable read device caching has been disabled because the '
-                 'RNN is in tf.while_loop loop context, which will cause '
-                 'reading stalled value in forward path. This could slow down '
-                 'the training due to duplicated variable reads. Please '
-                 'consider updating your code to remove tf.while_loop if '
-                 'possible.')
+    logging.warning(
+        'Variable read device caching has been disabled because the '
+        'RNN is in tf.while_loop loop context, which will cause '
+        'reading stalled value in forward path. This could slow down '
+        'the training due to duplicated variable reads. Please '
+        'consider updating your code to remove tf.while_loop if possible.')
     return None
   if (rnn_cell._dtype_policy.compute_dtype !=
       rnn_cell._dtype_policy.variable_dtype):
-    logging.warn('Variable read device caching has been disabled since it '
-                 'doesn\'t work with the mixed precision API. This is '
-                 'likely to cause a slowdown for RNN training due to '
-                 'duplicated read of variable for each timestep, which '
-                 'will be significant in a multi remote worker setting. '
-                 'Please consider disabling mixed precision API if '
-                 'the performance has been affected.')
+    logging.warning(
+        'Variable read device caching has been disabled since it '
+        'doesn\'t work with the mixed precision API. This is '
+        'likely to cause a slowdown for RNN training due to '
+        'duplicated read of variable for each timestep, which '
+        'will be significant in a multi remote worker setting. '
+        'Please consider disabling mixed precision API if '
+        'the performance has been affected.')
     return None
   # Cache the value on the device that access the variable.
   return lambda op: op.device

diff --git a/keras/layers/recurrent_v2.py b/keras/layers/recurrent_v2.py
@@ -396,7 +396,7 @@ def __init__(self,
       if self._could_use_gpu_kernel:
         logging.debug(_CUDNN_AVAILABLE_MSG % self.name)
       else:
-        logging.warn(_CUDNN_NOT_AVAILABLE_MSG % self.name)
+        logging.warning(_CUDNN_NOT_AVAILABLE_MSG % self.name)
 
     if _use_new_code():
       self._defun_wrapper = _DefunWrapper(time_major, go_backwards, 'gru')
@@ -1123,7 +1123,7 @@ def __init__(self,
       if self._could_use_gpu_kernel:
         logging.debug(_CUDNN_AVAILABLE_MSG % self.name)
       else:
-        logging.warn(_CUDNN_NOT_AVAILABLE_MSG % self.name)
+        logging.warning(_CUDNN_NOT_AVAILABLE_MSG % self.name)
 
     if _use_new_code():
       self._defun_wrapper = _DefunWrapper(time_major, go_backwards, 'lstm')

diff --git a/keras/metrics_confusion_matrix_test.py b/keras/metrics_confusion_matrix_test.py
@@ -1481,7 +1481,7 @@ def test_extra_dims(self):
       result = auc_obj(labels, logits)
       self.assertEqual(self.evaluate(result), 0.5)
     except ImportError as e:
-      tf_logging.warn('Cannot test special functions: %s' % str(e))
+      tf_logging.warning('Cannot test special functions: %s' % str(e))
 
 
 @combinations.generate(combinations.combine(mode=['graph', 'eager']))

diff --git a/keras/mixed_precision/device_compatibility_check.py b/keras/mixed_precision/device_compatibility_check.py
@@ -104,14 +104,15 @@ def _log_device_compatibility_check(policy_name, gpu_details_list):
     warning_str += ('See https://developer.nvidia.com/cuda-gpus for a list of '
                     'GPUs and their compute capabilities.\n')
     warning_str += _COMPAT_CHECK_WARNING_SUFFIX
-    tf_logging.warn(warning_str)
+    tf_logging.warning(warning_str)
   elif not supported_device_strs:
-    tf_logging.warn('%s\n'
-                    'The dtype policy mixed_float16 may run slowly because '
-                    'this machine does not have a GPU. Only Nvidia GPUs with '
-                    'compute capability of at least 7.0 run quickly with '
-                    'mixed_float16.\n%s' % (_COMPAT_CHECK_WARNING_PREFIX,
-                                            _COMPAT_CHECK_WARNING_SUFFIX))
+    tf_logging.warning(
+        '%s\n'
+        'The dtype policy mixed_float16 may run slowly because '
+        'this machine does not have a GPU. Only Nvidia GPUs with '
+        'compute capability of at least 7.0 run quickly with '
+        'mixed_float16.\n%s' % (_COMPAT_CHECK_WARNING_PREFIX,
+                                _COMPAT_CHECK_WARNING_SUFFIX))
   elif len(supported_device_strs) == 1:
     tf_logging.info('%s\n'
                     'Your GPU will likely run quickly with dtype policy '