From f78555426e3ec6279a25f14989bf1454ba319903 Mon Sep 17 00:00:00 2001
From: Amit Patankar <amitpatankar@google.com>
Date: Fri, 8 Nov 2019 17:17:57 -0800
Subject: [PATCH 001/130] Re-add the supported CUDA compute capabilities into
 the toolchain created for manylinux2010 compatibility for the 10.1 toolchain.

PiperOrigin-RevId: 279422340
Change-Id: Icaff93aed22c3a319efb6dee4c3f703eb17c1a11
---
 .../clang/bin/crosstool_wrapper_driver_is_not_gcc            | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1/clang/bin/crosstool_wrapper_driver_is_not_gcc b/third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1/clang/bin/crosstool_wrapper_driver_is_not_gcc
index af878f037734e0..bbfdde02409668 100755
--- a/third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1/clang/bin/crosstool_wrapper_driver_is_not_gcc
+++ b/third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1/clang/bin/crosstool_wrapper_driver_is_not_gcc
@@ -53,6 +53,11 @@ NVCC_PATH = '/usr/local/cuda-10.1/bin/nvcc'
 PREFIX_DIR = os.path.dirname(GCC_HOST_COMPILER_PATH)
 NVCC_VERSION = '10.1'
 
+# Environment variable for supported TF CUDA Compute Capabilities
+# eg. export TF_CUDA_COMPUTE_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0
+CUDA_COMPUTE_ENV_VAR = 'TF_CUDA_COMPUTE_CAPABILITIES'
+DEFAULT_CUDA_COMPUTE_CAPABILITIES = '3.5,6.0'
+
 def Log(s):
   print('gpus/crosstool: {0}'.format(s))
 

From 236128a1325b602c9df080157f5ed44edfb83533 Mon Sep 17 00:00:00 2001
From: Amit Patankar <amitpatankar@google.com>
Date: Fri, 8 Nov 2019 23:46:34 -0800
Subject: [PATCH 002/130] Add a missing line for compute in toolchain.

PiperOrigin-RevId: 279460905
Change-Id: Iaf895d72256b826aa2d3e474033800ececc7f658
---
 .../clang/bin/crosstool_wrapper_driver_is_not_gcc               | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1/clang/bin/crosstool_wrapper_driver_is_not_gcc b/third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1/clang/bin/crosstool_wrapper_driver_is_not_gcc
index bbfdde02409668..44e745f69e02bd 100755
--- a/third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1/clang/bin/crosstool_wrapper_driver_is_not_gcc
+++ b/third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1/clang/bin/crosstool_wrapper_driver_is_not_gcc
@@ -207,7 +207,7 @@ def InvokeNvcc(argv, log=False):
   srcs = ' '.join(src_files)
   out = ' -o ' + out_file[0]
 
-  supported_cuda_compute_capabilities = [ "3.0", "6.0" ]
+  supported_cuda_compute_capabilities = os.environ.get(CUDA_COMPUTE_ENV_VAR, DEFAULT_CUDA_COMPUTE_CAPABILITIES).split(',')
   nvccopts = '-D_FORCE_INLINES '
   for capability in supported_cuda_compute_capabilities:
     capability = capability.replace('.', '')

From d29ff1082744b631884e4159c865cfbcd77af047 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 9 Nov 2019 01:27:17 -0800
Subject: [PATCH 003/130] Export Keras preprocessing layers in
 tf.keras.layers.experimental.

PiperOrigin-RevId: 279470036
Change-Id: Ib2b363d21baa14ac49088dd700f769c933164340
---
 .../keras/engine/base_preprocessing_layer.py  |   2 +
 tensorflow/python/keras/layers/__init__.py    |   7 +
 .../layers/preprocessing/normalization.py     |   2 +
 .../layers/preprocessing/normalization_v1.py  |   2 +
 .../preprocessing/text_vectorization.py       |   3 +
 .../preprocessing/text_vectorization_v1.py    |   2 +
 .../tools/api/generator/api_init_files.bzl    |   1 +
 .../tools/api/generator/api_init_files_v1.bzl |   1 +
 ...tensorflow.keras.layers.experimental.pbtxt |   7 +
 ...cessing.-normalization.__metaclass__.pbtxt |  14 ++
 ...imental.preprocessing.-normalization.pbtxt | 225 +++++++++++++++++
 ...g.-preprocessing-layer.__metaclass__.pbtxt |  14 ++
 ...l.preprocessing.-preprocessing-layer.pbtxt | 221 +++++++++++++++++
 ...ng.-text-vectorization.__metaclass__.pbtxt |  14 ++
 ...al.preprocessing.-text-vectorization.pbtxt | 233 ++++++++++++++++++
 ...as.layers.experimental.preprocessing.pbtxt |  15 ++
 .../golden/v1/tensorflow.keras.layers.pbtxt   |   4 +
 ...tensorflow.keras.layers.experimental.pbtxt |   7 +
 ...cessing.-normalization.__metaclass__.pbtxt |  14 ++
 ...imental.preprocessing.-normalization.pbtxt | 223 +++++++++++++++++
 ...g.-preprocessing-layer.__metaclass__.pbtxt |  14 ++
 ...l.preprocessing.-preprocessing-layer.pbtxt | 221 +++++++++++++++++
 ...ng.-text-vectorization.__metaclass__.pbtxt |  14 ++
 ...al.preprocessing.-text-vectorization.pbtxt | 231 +++++++++++++++++
 ...as.layers.experimental.preprocessing.pbtxt |  15 ++
 .../golden/v2/tensorflow.keras.layers.pbtxt   |   4 +
 26 files changed, 1510 insertions(+)
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-normalization.__metaclass__.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-normalization.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-preprocessing-layer.__metaclass__.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-preprocessing-layer.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-text-vectorization.__metaclass__.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-text-vectorization.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-normalization.__metaclass__.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-normalization.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-preprocessing-layer.__metaclass__.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-preprocessing-layer.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-text-vectorization.__metaclass__.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-text-vectorization.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.pbtxt

diff --git a/tensorflow/python/keras/engine/base_preprocessing_layer.py b/tensorflow/python/keras/engine/base_preprocessing_layer.py
index 2f4bc8060aca59..29df47446733d2 100644
--- a/tensorflow/python/keras/engine/base_preprocessing_layer.py
+++ b/tensorflow/python/keras/engine/base_preprocessing_layer.py
@@ -28,8 +28,10 @@
 from tensorflow.python.keras.engine import training_generator
 from tensorflow.python.keras.engine.base_layer import Layer
 from tensorflow.python.ops import math_ops
+from tensorflow.python.util.tf_export import keras_export
 
 
+@keras_export('keras.layers.experimental.preprocessing.PreprocessingLayer')
 class PreprocessingLayer(Layer):
   """Base class for PreprocessingLayers."""
   __metaclass__ = abc.ABCMeta
diff --git a/tensorflow/python/keras/layers/__init__.py b/tensorflow/python/keras/layers/__init__.py
index 7655e5f6e0ea0a..87dfa34f932563 100644
--- a/tensorflow/python/keras/layers/__init__.py
+++ b/tensorflow/python/keras/layers/__init__.py
@@ -24,6 +24,13 @@
 from tensorflow.python.keras.engine.input_layer import InputLayer
 from tensorflow.python.keras.engine.input_spec import InputSpec
 from tensorflow.python.keras.engine.base_layer import Layer
+from tensorflow.python.keras.engine.base_preprocessing_layer import PreprocessingLayer
+
+# Preprocessing layers.
+from tensorflow.python.keras.layers.preprocessing.normalization import Normalization
+from tensorflow.python.keras.layers.preprocessing.normalization_v1 import Normalization as NormalizationV1
+from tensorflow.python.keras.layers.preprocessing.text_vectorization import TextVectorization
+from tensorflow.python.keras.layers.preprocessing.text_vectorization_v1 import TextVectorization as TextVectorizationV1
 
 # Advanced activations.
 from tensorflow.python.keras.layers.advanced_activations import LeakyReLU
diff --git a/tensorflow/python/keras/layers/preprocessing/normalization.py b/tensorflow/python/keras/layers/preprocessing/normalization.py
index c39f3f3ca19cbf..9f8c9d0a9038fe 100644
--- a/tensorflow/python/keras/layers/preprocessing/normalization.py
+++ b/tensorflow/python/keras/layers/preprocessing/normalization.py
@@ -30,6 +30,7 @@
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.util import compat
+from tensorflow.python.util.tf_export import keras_export
 
 _COUNT_NAME = 'count'
 _MEAN_NAME = 'mean'
@@ -37,6 +38,7 @@
 
 
 # TODO(momernick): Find a good example of normalization?
+@keras_export('keras.layers.experimental.preprocessing.Normalization', v1=[])
 class Normalization(CombinerPreprocessingLayer):
   """Feature-wise normalization of the data.
 
diff --git a/tensorflow/python/keras/layers/preprocessing/normalization_v1.py b/tensorflow/python/keras/layers/preprocessing/normalization_v1.py
index 5f265e507e4750..2cb4413cf7f131 100644
--- a/tensorflow/python/keras/layers/preprocessing/normalization_v1.py
+++ b/tensorflow/python/keras/layers/preprocessing/normalization_v1.py
@@ -20,7 +20,9 @@
 
 from tensorflow.python.keras.engine.base_preprocessing_layer_v1 import CombinerPreprocessingLayer
 from tensorflow.python.keras.layers.preprocessing import normalization
+from tensorflow.python.util.tf_export import keras_export
 
 
+@keras_export(v1=['keras.layers.experimental.preprocessing.Normalization'])
 class Normalization(normalization.Normalization, CombinerPreprocessingLayer):
   pass
diff --git a/tensorflow/python/keras/layers/preprocessing/text_vectorization.py b/tensorflow/python/keras/layers/preprocessing/text_vectorization.py
index 035c73fba991b5..bbba593c351b50 100644
--- a/tensorflow/python/keras/layers/preprocessing/text_vectorization.py
+++ b/tensorflow/python/keras/layers/preprocessing/text_vectorization.py
@@ -42,6 +42,7 @@
 from tensorflow.python.ops.ragged import ragged_string_ops
 from tensorflow.python.ops.ragged import ragged_tensor
 from tensorflow.python.util import compat
+from tensorflow.python.util.tf_export import keras_export
 
 LOWER_AND_STRIP_PUNCTUATION = "lower_and_strip_punctuation"
 
@@ -74,6 +75,8 @@
 _ACCUMULATOR_NUM_DOCUMENTS = "num_documents"
 
 
+@keras_export(
+    "keras.layers.experimental.preprocessing.TextVectorization", v1=[])
 class TextVectorization(CombinerPreprocessingLayer):
   """Text vectorization layer.
 
diff --git a/tensorflow/python/keras/layers/preprocessing/text_vectorization_v1.py b/tensorflow/python/keras/layers/preprocessing/text_vectorization_v1.py
index 43b4c7c0132bd5..7b71cfb7b31cfe 100644
--- a/tensorflow/python/keras/layers/preprocessing/text_vectorization_v1.py
+++ b/tensorflow/python/keras/layers/preprocessing/text_vectorization_v1.py
@@ -24,8 +24,10 @@
 from tensorflow.python.keras.engine import base_preprocessing_layer_v1
 from tensorflow.python.keras.layers.preprocessing import text_vectorization
 from tensorflow.python.ops.ragged import ragged_tensor_value
+from tensorflow.python.util.tf_export import keras_export
 
 
+@keras_export(v1=['keras.layers.experimental.preprocessing.TextVectorization'])
 class TextVectorization(text_vectorization.TextVectorization,
                         base_preprocessing_layer_v1.CombinerPreprocessingLayer):
   """Text vectorization layer.
diff --git a/tensorflow/python/tools/api/generator/api_init_files.bzl b/tensorflow/python/tools/api/generator/api_init_files.bzl
index 1ddf0bcb1fbc13..b2981b14209138 100644
--- a/tensorflow/python/tools/api/generator/api_init_files.bzl
+++ b/tensorflow/python/tools/api/generator/api_init_files.bzl
@@ -104,6 +104,7 @@ KERAS_API_INIT_FILES = [
     "keras/initializers/__init__.py",
     "keras/layers/__init__.py",
     "keras/layers/experimental/__init__.py",
+    "keras/layers/experimental/preprocessing/__init__.py",
     "keras/losses/__init__.py",
     "keras/metrics/__init__.py",
     "keras/mixed_precision/__init__.py",
diff --git a/tensorflow/python/tools/api/generator/api_init_files_v1.bzl b/tensorflow/python/tools/api/generator/api_init_files_v1.bzl
index 62ecd8a284aa49..31e0c6ca457795 100644
--- a/tensorflow/python/tools/api/generator/api_init_files_v1.bzl
+++ b/tensorflow/python/tools/api/generator/api_init_files_v1.bzl
@@ -129,6 +129,7 @@ KERAS_API_INIT_FILES_V1 = [
     "keras/initializers/__init__.py",
     "keras/layers/__init__.py",
     "keras/layers/experimental/__init__.py",
+    "keras/layers/experimental/preprocessing/__init__.py",
     "keras/losses/__init__.py",
     "keras/metrics/__init__.py",
     "keras/mixed_precision/__init__.py",
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.pbtxt
new file mode 100644
index 00000000000000..7f6d81d297a09e
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.pbtxt
@@ -0,0 +1,7 @@
+path: "tensorflow.keras.layers.experimental"
+tf_module {
+  member {
+    name: "preprocessing"
+    mtype: "<type \'module\'>"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-normalization.__metaclass__.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-normalization.__metaclass__.pbtxt
new file mode 100644
index 00000000000000..20bb9904d18d49
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-normalization.__metaclass__.pbtxt
@@ -0,0 +1,14 @@
+path: "tensorflow.keras.layers.experimental.preprocessing.Normalization.__metaclass__"
+tf_class {
+  is_instance: "<type \'type\'>"
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "mro"
+  }
+  member_method {
+    name: "register"
+    argspec: "args=[\'cls\', \'subclass\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-normalization.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-normalization.pbtxt
new file mode 100644
index 00000000000000..59f759886c9015
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-normalization.pbtxt
@@ -0,0 +1,225 @@
+path: "tensorflow.keras.layers.experimental.preprocessing.Normalization"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.layers.preprocessing.normalization_v1.Normalization\'>"
+  is_instance: "<class \'tensorflow.python.keras.layers.preprocessing.normalization.Normalization\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_preprocessing_layer_v1.CombinerPreprocessingLayer\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_preprocessing_layer.CombinerPreprocessingLayer\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_preprocessing_layer.PreprocessingLayer\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.module.module.Module\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.tracking.AutoTrackable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_spec"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "metrics"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name_scope"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "stateful"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "submodules"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'axis\', \'dtype\'], varargs=None, keywords=kwargs, defaults=[\'-1\', \'None\'], "
+  }
+  member_method {
+    name: "adapt"
+    argspec: "args=[\'self\', \'data\', \'reset_state\'], varargs=None, keywords=None, defaults=[\'True\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "compute_output_signature"
+    argspec: "args=[\'self\', \'input_spec\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "with_name_scope"
+    argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-preprocessing-layer.__metaclass__.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-preprocessing-layer.__metaclass__.pbtxt
new file mode 100644
index 00000000000000..ceebb69d16a6b0
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-preprocessing-layer.__metaclass__.pbtxt
@@ -0,0 +1,14 @@
+path: "tensorflow.keras.layers.experimental.preprocessing.PreprocessingLayer.__metaclass__"
+tf_class {
+  is_instance: "<type \'type\'>"
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "mro"
+  }
+  member_method {
+    name: "register"
+    argspec: "args=[\'cls\', \'subclass\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-preprocessing-layer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-preprocessing-layer.pbtxt
new file mode 100644
index 00000000000000..cf939df37dced8
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-preprocessing-layer.pbtxt
@@ -0,0 +1,221 @@
+path: "tensorflow.keras.layers.experimental.preprocessing.PreprocessingLayer"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.engine.base_preprocessing_layer.PreprocessingLayer\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.module.module.Module\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.tracking.AutoTrackable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_spec"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "metrics"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name_scope"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "stateful"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "submodules"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'trainable\', \'name\', \'dtype\', \'dynamic\'], varargs=None, keywords=kwargs, defaults=[\'True\', \'None\', \'None\', \'False\'], "
+  }
+  member_method {
+    name: "adapt"
+    argspec: "args=[\'self\', \'data\', \'reset_state\'], varargs=None, keywords=None, defaults=[\'True\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "compute_output_signature"
+    argspec: "args=[\'self\', \'input_signature\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "with_name_scope"
+    argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-text-vectorization.__metaclass__.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-text-vectorization.__metaclass__.pbtxt
new file mode 100644
index 00000000000000..fe45a5da03bddc
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-text-vectorization.__metaclass__.pbtxt
@@ -0,0 +1,14 @@
+path: "tensorflow.keras.layers.experimental.preprocessing.TextVectorization.__metaclass__"
+tf_class {
+  is_instance: "<type \'type\'>"
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "mro"
+  }
+  member_method {
+    name: "register"
+    argspec: "args=[\'cls\', \'subclass\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-text-vectorization.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-text-vectorization.pbtxt
new file mode 100644
index 00000000000000..d79b7d712f1266
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-text-vectorization.pbtxt
@@ -0,0 +1,233 @@
+path: "tensorflow.keras.layers.experimental.preprocessing.TextVectorization"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.layers.preprocessing.text_vectorization_v1.TextVectorization\'>"
+  is_instance: "<class \'tensorflow.python.keras.layers.preprocessing.text_vectorization.TextVectorization\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_preprocessing_layer_v1.CombinerPreprocessingLayer\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_preprocessing_layer.CombinerPreprocessingLayer\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_preprocessing_layer.PreprocessingLayer\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.module.module.Module\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.tracking.AutoTrackable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_spec"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "metrics"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name_scope"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "stateful"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "submodules"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'max_tokens\', \'standardize\', \'split\', \'ngrams\', \'output_mode\', \'output_sequence_length\', \'pad_to_max_tokens\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'lower_and_strip_punctuation\', \'whitespace\', \'None\', \'int\', \'None\', \'True\'], "
+  }
+  member_method {
+    name: "adapt"
+    argspec: "args=[\'self\', \'data\', \'reset_state\'], varargs=None, keywords=None, defaults=[\'True\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "compute_output_signature"
+    argspec: "args=[\'self\', \'input_spec\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_vocabulary"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_vocabulary"
+    argspec: "args=[\'self\', \'vocab\', \'df_data\', \'oov_df_value\', \'append\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'False\'], "
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "with_name_scope"
+    argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.pbtxt
new file mode 100644
index 00000000000000..abfd2c682de42c
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.pbtxt
@@ -0,0 +1,15 @@
+path: "tensorflow.keras.layers.experimental.preprocessing"
+tf_module {
+  member {
+    name: "Normalization"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "PreprocessingLayer"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "TextVectorization"
+    mtype: "<type \'type\'>"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.pbtxt
index 603803595b6bc8..847cc814e0ff03 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.pbtxt
@@ -416,6 +416,10 @@ tf_module {
     name: "ZeroPadding3D"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "experimental"
+    mtype: "<type \'module\'>"
+  }
   member_method {
     name: "Input"
     argspec: "args=[\'shape\', \'batch_size\', \'name\', \'dtype\', \'sparse\', \'tensor\', \'ragged\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'False\', \'None\', \'False\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.pbtxt
new file mode 100644
index 00000000000000..7f6d81d297a09e
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.pbtxt
@@ -0,0 +1,7 @@
+path: "tensorflow.keras.layers.experimental"
+tf_module {
+  member {
+    name: "preprocessing"
+    mtype: "<type \'module\'>"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-normalization.__metaclass__.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-normalization.__metaclass__.pbtxt
new file mode 100644
index 00000000000000..20bb9904d18d49
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-normalization.__metaclass__.pbtxt
@@ -0,0 +1,14 @@
+path: "tensorflow.keras.layers.experimental.preprocessing.Normalization.__metaclass__"
+tf_class {
+  is_instance: "<type \'type\'>"
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "mro"
+  }
+  member_method {
+    name: "register"
+    argspec: "args=[\'cls\', \'subclass\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-normalization.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-normalization.pbtxt
new file mode 100644
index 00000000000000..0efa1a8f5af253
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-normalization.pbtxt
@@ -0,0 +1,223 @@
+path: "tensorflow.keras.layers.experimental.preprocessing.Normalization"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.layers.preprocessing.normalization.Normalization\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_preprocessing_layer.CombinerPreprocessingLayer\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_preprocessing_layer.PreprocessingLayer\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.module.module.Module\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.tracking.AutoTrackable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_spec"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "metrics"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name_scope"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "stateful"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "submodules"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'axis\', \'dtype\'], varargs=None, keywords=kwargs, defaults=[\'-1\', \'None\'], "
+  }
+  member_method {
+    name: "adapt"
+    argspec: "args=[\'self\', \'data\', \'reset_state\'], varargs=None, keywords=None, defaults=[\'True\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "compute_output_signature"
+    argspec: "args=[\'self\', \'input_spec\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "with_name_scope"
+    argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-preprocessing-layer.__metaclass__.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-preprocessing-layer.__metaclass__.pbtxt
new file mode 100644
index 00000000000000..ceebb69d16a6b0
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-preprocessing-layer.__metaclass__.pbtxt
@@ -0,0 +1,14 @@
+path: "tensorflow.keras.layers.experimental.preprocessing.PreprocessingLayer.__metaclass__"
+tf_class {
+  is_instance: "<type \'type\'>"
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "mro"
+  }
+  member_method {
+    name: "register"
+    argspec: "args=[\'cls\', \'subclass\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-preprocessing-layer.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-preprocessing-layer.pbtxt
new file mode 100644
index 00000000000000..cf939df37dced8
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-preprocessing-layer.pbtxt
@@ -0,0 +1,221 @@
+path: "tensorflow.keras.layers.experimental.preprocessing.PreprocessingLayer"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.engine.base_preprocessing_layer.PreprocessingLayer\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.module.module.Module\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.tracking.AutoTrackable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_spec"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "metrics"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name_scope"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "stateful"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "submodules"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'trainable\', \'name\', \'dtype\', \'dynamic\'], varargs=None, keywords=kwargs, defaults=[\'True\', \'None\', \'None\', \'False\'], "
+  }
+  member_method {
+    name: "adapt"
+    argspec: "args=[\'self\', \'data\', \'reset_state\'], varargs=None, keywords=None, defaults=[\'True\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "compute_output_signature"
+    argspec: "args=[\'self\', \'input_signature\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "with_name_scope"
+    argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-text-vectorization.__metaclass__.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-text-vectorization.__metaclass__.pbtxt
new file mode 100644
index 00000000000000..fe45a5da03bddc
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-text-vectorization.__metaclass__.pbtxt
@@ -0,0 +1,14 @@
+path: "tensorflow.keras.layers.experimental.preprocessing.TextVectorization.__metaclass__"
+tf_class {
+  is_instance: "<type \'type\'>"
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "mro"
+  }
+  member_method {
+    name: "register"
+    argspec: "args=[\'cls\', \'subclass\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-text-vectorization.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-text-vectorization.pbtxt
new file mode 100644
index 00000000000000..85fe8aec94bf8c
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-text-vectorization.pbtxt
@@ -0,0 +1,231 @@
+path: "tensorflow.keras.layers.experimental.preprocessing.TextVectorization"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.layers.preprocessing.text_vectorization.TextVectorization\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_preprocessing_layer.CombinerPreprocessingLayer\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_preprocessing_layer.PreprocessingLayer\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.module.module.Module\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.tracking.AutoTrackable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_spec"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "metrics"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name_scope"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "stateful"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "submodules"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'max_tokens\', \'standardize\', \'split\', \'ngrams\', \'output_mode\', \'output_sequence_length\', \'pad_to_max_tokens\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'lower_and_strip_punctuation\', \'whitespace\', \'None\', \'int\', \'None\', \'True\'], "
+  }
+  member_method {
+    name: "adapt"
+    argspec: "args=[\'self\', \'data\', \'reset_state\'], varargs=None, keywords=None, defaults=[\'True\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "compute_output_signature"
+    argspec: "args=[\'self\', \'input_spec\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_vocabulary"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_vocabulary"
+    argspec: "args=[\'self\', \'vocab\', \'df_data\', \'oov_df_value\', \'append\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'False\'], "
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "with_name_scope"
+    argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.pbtxt
new file mode 100644
index 00000000000000..abfd2c682de42c
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.pbtxt
@@ -0,0 +1,15 @@
+path: "tensorflow.keras.layers.experimental.preprocessing"
+tf_module {
+  member {
+    name: "Normalization"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "PreprocessingLayer"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "TextVectorization"
+    mtype: "<type \'type\'>"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.pbtxt
index 9f1b0dc41fdfde..5574cc9ca5918a 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.pbtxt
@@ -408,6 +408,10 @@ tf_module {
     name: "ZeroPadding3D"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "experimental"
+    mtype: "<type \'module\'>"
+  }
   member_method {
     name: "Input"
     argspec: "args=[\'shape\', \'batch_size\', \'name\', \'dtype\', \'sparse\', \'tensor\', \'ragged\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'False\', \'None\', \'False\'], "

From f8759b65139c36af4109272e8f18781ea11fca56 Mon Sep 17 00:00:00 2001
From: Taylor Robie <taylorrobie@google.com>
Date: Fri, 8 Nov 2019 16:13:04 -0800
Subject: [PATCH 004/130] run Model.*_on_batch using tf.function where
 appropriate.

PiperOrigin-RevId: 279412139
Change-Id: I3c55a5388b0cfd144d63f5e9f444cf3e96471ec4
---
 .../python/keras/engine/base_layer_test.py    | 12 +--
 tensorflow/python/keras/engine/training.py    |  7 +-
 .../python/keras/engine/training_v2_utils.py  | 90 +++++++++++++++----
 .../saving/saved_model/saved_model_test.py    |  6 +-
 4 files changed, 90 insertions(+), 25 deletions(-)

diff --git a/tensorflow/python/keras/engine/base_layer_test.py b/tensorflow/python/keras/engine/base_layer_test.py
index d7a005bad5b9d1..913464ae47948b 100644
--- a/tensorflow/python/keras/engine/base_layer_test.py
+++ b/tensorflow/python/keras/engine/base_layer_test.py
@@ -1009,12 +1009,14 @@ def call(self, inputs, training=None):
         'mse',
         run_eagerly=testing_utils.should_run_eagerly(),
         experimental_run_tf_function=testing_utils.should_run_tf_function())
-    _, train_metric = model.train_on_batch(np.ones((2, 3)),
+    for _ in range(3):
+      _, train_metric = model.train_on_batch(np.ones((2, 3)),
+                                             np.ones((2, 3)))
+
+      self.assertEqual(train_metric, 2 * 3)
+      _, test_metric = model.test_on_batch(np.ones((2, 3)),
                                            np.ones((2, 3)))
-    self.assertEqual(train_metric, 2 * 3)
-    _, test_metric = model.test_on_batch(np.ones((2, 3)),
-                                         np.ones((2, 3)))
-    self.assertEqual(test_metric, 0)
+      self.assertEqual(test_metric, 0)
 
   def test_if_training_pattern_update(self):
 
diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
index b66c39d664a8fd..eef8ad84d548a3 100644
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
@@ -1035,7 +1035,8 @@ class during training. This can be useful to tell the model to "pay
     if self._experimental_run_tf_function:
       outputs = training_v2_utils.train_on_batch(
           self, x, y=y, sample_weight=sample_weight,
-          class_weight=class_weight, reset_metrics=reset_metrics)
+          class_weight=class_weight, reset_metrics=reset_metrics,
+          standalone=True)
       outputs = (outputs['total_loss'] + outputs['output_losses'] +
                  outputs['metrics'])
       outputs = [
@@ -1132,7 +1133,7 @@ def test_on_batch(self, x, y=None, sample_weight=None, reset_metrics=True):
     if self._experimental_run_tf_function:
       outputs = training_v2_utils.test_on_batch(
           self, x, y=y, sample_weight=sample_weight,
-          reset_metrics=reset_metrics)
+          reset_metrics=reset_metrics, standalone=True)
       outputs = (outputs['total_loss'] + outputs['output_losses'] +
                  outputs['metrics'])
       outputs = [
@@ -1197,7 +1198,7 @@ def predict_on_batch(self, x):
     """
     self._check_call_args('predict_on_batch')
     if self._experimental_run_tf_function:
-      return training_v2_utils.predict_on_batch(self, x)
+      return training_v2_utils.predict_on_batch(self, x, standalone=True)
 
     if (self._distribution_strategy and
         distribution_strategy_context.in_cross_replica_context()):
diff --git a/tensorflow/python/keras/engine/training_v2_utils.py b/tensorflow/python/keras/engine/training_v2_utils.py
index 665a4a2639189a..92db3adc8234fa 100644
--- a/tensorflow/python/keras/engine/training_v2_utils.py
+++ b/tensorflow/python/keras/engine/training_v2_utils.py
@@ -46,19 +46,27 @@
 from tensorflow.python.util import nest
 
 
-def _get_or_make_execution_function(model, mode):
-  """Makes or reuses function to run one step of distributed model execution."""
+def _get_or_make_function(model, mode, key_fn, make_fn):
+  """Helper function for managing cached execution functions."""
   model._init_distributed_function_cache_if_not_compiled()
+  key = key_fn(mode)
 
-  # Use a key with 'v2' to distinguish from fall-back execution functions.
-  key = (mode, 'v2')
-  distributed_function = dist_utils.get_distributed_function(model, key)
-  if distributed_function:
-    return distributed_function
+  function = dist_utils.get_distributed_function(model, key)
+  if function:
+    return function
 
-  distribution_function = _make_execution_function(model, mode)
-  dist_utils.set_distributed_function(model, key, distribution_function)
-  return distribution_function
+  function = make_fn(model, mode)
+  dist_utils.set_distributed_function(model, key, function)
+  return function
+
+
+def _get_or_make_execution_function(model, mode):
+  """Makes or reuses function to run one step of distributed model execution."""
+  return _get_or_make_function(
+      model, mode,
+      # Use a key with 'v2' to distinguish from fall-back execution functions.
+      key_fn=lambda m: (m, 'v2'),
+      make_fn=_make_execution_function)
 
 
 def _make_execution_function(model, mode):
@@ -92,6 +100,30 @@ def execution_function(input_fn):
   return execution_function
 
 
+def _get_or_make_on_batch_function(model, mode):
+  """Makes or reuses function to run one step of distributed model execution."""
+  return _get_or_make_function(
+      model, mode,
+      # Use a key with 'v2' to distinguish from fall-back execution functions.
+      key_fn=lambda m: (m, 'v2_on_batch'),
+      make_fn=_make_on_batch_function)
+
+
+def _make_on_batch_function(model, mode):
+  """Creates a function of Model.*_on_batch methods."""
+  if mode == ModeKeys.TRAIN:
+    func = training_eager.train_on_batch
+  elif mode == ModeKeys.TEST:
+    func = training_eager.test_on_batch
+  else:
+    func = model
+
+  if not model.run_eagerly:
+    func = def_function.function(func)
+
+  return func
+
+
 def _non_none_constant_value(v):
   constant_value = tensor_util.constant_value(v)
   return constant_value if constant_value is not None else v
@@ -292,7 +324,8 @@ def train_on_batch(
     y=None,
     sample_weight=None,
     class_weight=None,
-    reset_metrics=True):
+    reset_metrics=True,
+    standalone=False):
   """Runs a single gradient update on a single batch of data.
 
   Arguments:
@@ -324,6 +357,8 @@ class during training. This can be useful to tell the model to "pay
       reset_metrics: If `True`, the metrics returned will be only for this
         batch. If `False`, the metrics will be statefully accumulated across
         batches.
+      standalone: If True, this method is not called as part of
+        Model.fit/evaluate/predict and can therefore be tf.function'd.
 
   Returns:
       Scalar training loss
@@ -348,7 +383,13 @@ class during training. This can be useful to tell the model to "pay
   # at this point because of the check above.  `train_on_batch` is being run
   # for each replica by `model._distribution_strategy` and the same code path
   # as Eager is expected to be taken.
-  outputs = training_eager.train_on_batch(
+
+  if standalone:
+    train_on_batch_fn = _get_or_make_on_batch_function(model, ModeKeys.TRAIN)
+  else:
+    train_on_batch_fn = training_eager.train_on_batch
+
+  outputs = train_on_batch_fn(
       model,
       x,
       y,
@@ -362,7 +403,8 @@ class during training. This can be useful to tell the model to "pay
   return outputs
 
 
-def test_on_batch(model, x, y=None, sample_weight=None, reset_metrics=True):
+def test_on_batch(model, x, y=None, sample_weight=None, reset_metrics=True,
+                  standalone=False):
   """Test the model on a single batch of samples.
 
   Arguments:
@@ -392,6 +434,8 @@ def test_on_batch(model, x, y=None, sample_weight=None, reset_metrics=True):
       reset_metrics: If `True`, the metrics returned will be only for this
         batch. If `False`, the metrics will be statefully accumulated across
         batches.
+      standalone: If True, this method is not called as part of
+        Model.fit/evaluate/predict and can therefore be tf.function'd.
 
   Returns:
       Scalar test loss (if the model has a single output and no metrics)
@@ -411,7 +455,13 @@ def test_on_batch(model, x, y=None, sample_weight=None, reset_metrics=True):
       x, y, sample_weight=sample_weight, extract_tensors_from_dataset=True)
 
   batch_size = array_ops.shape(nest.flatten(x, expand_composites=True)[0])[0]
-  outputs = training_eager.test_on_batch(
+
+  if standalone:
+    test_on_batch_fn = _get_or_make_on_batch_function(model, ModeKeys.TEST)
+  else:
+    test_on_batch_fn = training_eager.test_on_batch
+
+  outputs = test_on_batch_fn(
       model,
       x,
       y,
@@ -425,7 +475,7 @@ def test_on_batch(model, x, y=None, sample_weight=None, reset_metrics=True):
   return outputs
 
 
-def predict_on_batch(model, x):
+def predict_on_batch(model, x, standalone=False):
   """Returns predictions for a single batch of samples.
 
   Arguments:
@@ -436,6 +486,8 @@ def predict_on_batch(model, x):
         - A TensorFlow tensor, or a list of tensors
           (in case the model has multiple inputs).
         - A `tf.data` dataset.
+      standalone: If True, this method is not called as part of
+        Model.fit/evaluate/predict and can therefore be tf.function'd.
 
   Returns:
       Numpy array(s) of predictions.
@@ -458,5 +510,11 @@ def predict_on_batch(model, x):
     if len(inputs) == 1:
       inputs = inputs[0]
 
+  if standalone:
+    predict_on_batch_fn = _get_or_make_on_batch_function(
+        model, ModeKeys.PREDICT)
+  else:
+    predict_on_batch_fn = model
+
   with backend.eager_learning_phase_scope(0):
-    return model(inputs)  # pylint: disable=not-callable
+    return predict_on_batch_fn(inputs)  # pylint: disable=not-callable
diff --git a/tensorflow/python/keras/saving/saved_model/saved_model_test.py b/tensorflow/python/keras/saving/saved_model/saved_model_test.py
index aa5886096bc771..abd1670c6157e1 100644
--- a/tensorflow/python/keras/saving/saved_model/saved_model_test.py
+++ b/tensorflow/python/keras/saving/saved_model/saved_model_test.py
@@ -200,7 +200,11 @@ def test_maintains_losses(self):
 
     saved_model_dir = self._save_model_dir()
     tf_save.save(model, saved_model_dir)
-    self.assertAllEqual(previous_losses, model.losses)
+
+    with previous_losses[0].graph.as_default():
+      # If we try to compare symbolic Tensors in eager mode assertAllEqual will
+      # return False even if they are the same Tensor.
+      self.assertAllEqual(previous_losses, model.losses)
 
     if context.executing_eagerly():
       # Test that eager losses are maintained.

From b22b9c9e0c52aece222282c50ca412701e4b826f Mon Sep 17 00:00:00 2001
From: Andrew Audibert <aaudibert@google.com>
Date: Fri, 8 Nov 2019 11:47:40 -0800
Subject: [PATCH 005/130] Fix flaky doctest.

The doctest was using dictionary output to exhibit structured
element usage. This caused a problem since dictionary key order
is undefined. This changes the example to use a tuple instead.

PiperOrigin-RevId: 279359482
Change-Id: Ideb9970935079b156f88f48672dea2d375d497ae
---
 tensorflow/python/data/ops/dataset_ops.py | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index f49e0a65e3fac3..50f9514de11be0 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -1441,19 +1441,18 @@ def padded_batch(self,
     [[ 6  7 -1]
      [ 8 -1 -1]]
     >>> # Components of nested elements can be padded independently.
-    >>> elements = [{'v1': [1, 2, 3], 'v2': [10]},
-    ...             {'v1': [4, 5], 'v2': [11, 12]}]
+    >>> elements = [([1, 2, 3], [10]),
+    ...             ([4, 5], [11, 12])]
     >>> dataset = tf.data.Dataset.from_generator(
-    ...     lambda: iter(elements), {'v1': tf.int32, 'v2': tf.int32})
-    >>> # Pad 'val1' to length 4, and 'val2' to the smallest size that fits.
+    ...     lambda: iter(elements), (tf.int32, tf.int32))
+    >>> # Pad the first component of the tuple to length 4, and the second
+    >>> # component to the smallest size that fits.
     >>> dataset = dataset.padded_batch(2,
-    ...     padded_shapes={'v1': [4], 'v2': [None]},
-    ...     padding_values={'v1': -1, 'v2': 100})
+    ...     padded_shapes=([4], [None]),
+    ...     padding_values=(-1, 100))
     >>> list(dataset.as_numpy_iterator())
-    [{'v1': array([[ 1,  2,  3, -1],
-           [ 4,  5, -1, -1]], dtype=int32), 'v2': array([[ 10, 100],
-           [ 11,  12]], dtype=int32)}]
-
+    [(array([[ 1,  2,  3, -1], [ 4,  5, -1, -1]], dtype=int32),
+      array([[ 10, 100], [ 11,  12]], dtype=int32))]
 
     See also `tf.data.experimental.dense_to_sparse_batch`, which combines
     elements that may have different shapes into a `tf.SparseTensor`.

From e8c1190a55e387cb805b3b2d054b6d89d662b230 Mon Sep 17 00:00:00 2001
From: Andrew Audibert <aaudibert@google.com>
Date: Mon, 11 Nov 2019 16:05:07 -0800
Subject: [PATCH 006/130] Fix bug in output ordering from interleave with
 num_parallel_calls != None.

Before this fix, Dataset.interleave(..., num_parallel_calls=2) would result
in non-deterministic output order, even when
options.experimental_deterministic=True. The non-determinism is seen
when the cycle length of the interleave exceeds the number of input elements.

PiperOrigin-RevId: 279838573
Change-Id: I6dc94b071a8eeb269e24b192aa65be105022444c
---
 .../data/parallel_interleave_dataset_op.cc    | 34 +++++++++++++------
 .../parallel_interleave_dataset_op_test.cc    | 32 ++++++++++++++++-
 2 files changed, 54 insertions(+), 12 deletions(-)

diff --git a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
index 9e1237c133b362..d64c623565e356 100644
--- a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
@@ -261,7 +261,6 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase {
       if (num_parallel_calls_->value == model::kAutotune) {
         num_parallel_calls_->value = dataset()->cycle_length_;
       }
-      last_valid_current_element_ = dataset()->cycle_length_ - 1;
       ctx_ = std::make_unique<IteratorContext>(*ctx);
       TF_RETURN_IF_ERROR(
           dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_));
@@ -437,10 +436,12 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase {
       if (!initial_elements_created_) {
         for (int i = 0; i < dataset()->cycle_length_; ++i) {
           current_elements_[i] = MakeElement();
-          if (current_elements_[i]) {
-            current_elements_[i]->cycle_index = i;
-            elements_to_process_.push_back(i);
+          if (!current_elements_[i]) {
+            break;
           }
+          current_elements_[i]->cycle_index = i;
+          elements_to_process_.push_back(i);
+          last_valid_current_element_ = i;
         }
         initial_elements_created_ = true;
       }
@@ -457,8 +458,9 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase {
     // Advances the position in the interleave cycle to the next cycle
     // element.
     void AdvanceToNextInCycle() EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+      DCHECK_NE(last_valid_current_element_, -1);
       block_index_ = 0;
-      cycle_index_ = (cycle_index_ + 1) % dataset()->cycle_length_;
+      cycle_index_ = (cycle_index_ + 1) % (last_valid_current_element_ + 1);
     }
 
     // Advances the position in the interleave cycle by one.
@@ -494,6 +496,10 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase {
     bool ConsumeHelper(std::shared_ptr<Result>* result)
         EXCLUSIVE_LOCKS_REQUIRED(mu_) {
       while (true) {
+        if (last_valid_current_element_ == -1) {
+          // Reached end of input.
+          return true;
+        }
         for (int64 i = 0; i < (last_valid_current_element_ + 1); ++i) {
           int64 index = (cycle_index_ + i) % (last_valid_current_element_ + 1);
           if (current_elements_[index]) {
@@ -504,10 +510,7 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase {
             break;
           }
         }
-        if (!current_elements_[cycle_index_]) {
-          // Reached end of input.
-          return true;
-        }
+        DCHECK(current_elements_[cycle_index_]);
         std::shared_ptr<Element> element = current_elements_[cycle_index_];
         if (!element->results.empty()) {
           // We found a result.
@@ -551,9 +554,16 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase {
           while (last_valid_current_element_ >= 0 &&
                  !current_elements_[last_valid_current_element_]) {
             last_valid_current_element_--;
+            if (cycle_index_ > last_valid_current_element_) {
+              // We are about to move the cycle index below in
+              // AdvanceToNextInCycle().
+              cycle_index_ = last_valid_current_element_;
+            }
           }
         }
-        AdvanceToNextInCycle();
+        if (last_valid_current_element_ != -1) {
+          AdvanceToNextInCycle();
+        }
       }
     }
 
@@ -1152,7 +1162,7 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase {
     // TODO(aaudibert): Generalize this optimization by removing null elements
     // from `current_elements_`, e.g. by compacting the vector when x% of
     // its elements are null.
-    int64 last_valid_current_element_ GUARDED_BY(mu_);
+    int64 last_valid_current_element_ GUARDED_BY(mu_) = -1;
 
     const int per_iterator_prefetch_;
     const int future_elements_prefetch_;
@@ -1208,6 +1218,8 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase {
 
     // Identifies position in the interleave cycle.
     int64 block_index_ GUARDED_BY(mu_) = 0;
+    // It is an invariant that either `last_valid_current_element_ == -1` or
+    // `cycle_index_ <= last_valid_current_element_`.
     int64 cycle_index_ GUARDED_BY(mu_) = 0;
 
     // Elements of the current interleave cycle.
diff --git a/tensorflow/core/kernels/data/parallel_interleave_dataset_op_test.cc b/tensorflow/core/kernels/data/parallel_interleave_dataset_op_test.cc
index 8b0bd0ce316d92..6517cac7799f94 100644
--- a/tensorflow/core/kernels/data/parallel_interleave_dataset_op_test.cc
+++ b/tensorflow/core/kernels/data/parallel_interleave_dataset_op_test.cc
@@ -364,6 +364,29 @@ ParallelInterleaveDatasetParams ParallelInterleaveDatasetParams10() {
       /*node_name=*/kNodeName);
 }
 
+ParallelInterleaveDatasetParams LongCycleDeteriministicParams() {
+  auto tensor_slice_dataset_params = TensorSliceDatasetParams(
+      /*components=*/{CreateTensor<tstring>(
+          TensorShape{3, 3, 1}, {"a", "b", "c", "d", "e", "f", "g", "h", "i"})},
+      /*node_name=*/"tensor_slice");
+  return ParallelInterleaveDatasetParams(
+      tensor_slice_dataset_params,
+      /*other_arguments=*/{},
+      /*cycle_length=*/11,
+      /*block_length=*/1,
+      /*num_parallel_calls=*/2,
+      /*func=*/
+      MakeTensorSliceDatasetFunc(
+          DataTypeVector({DT_STRING}),
+          std::vector<PartialTensorShape>({PartialTensorShape({1})})),
+      /*func_lib=*/{test::function::MakeTensorSliceDataset()},
+      /*type_arguments=*/{},
+      /*output_dtypes=*/{DT_STRING},
+      /*output_shapes=*/{PartialTensorShape({1})},
+      /*sloppy=*/false,
+      /*node_name=*/kNodeName);
+}
+
 // test case 11: cycle_length = 0, block_length = 1, num_parallel_calls = 2,
 // sloppy = true
 ParallelInterleaveDatasetParams
@@ -504,7 +527,14 @@ GetNextTestCases() {
            CreateTensors<tstring>(
                TensorShape{1},
                {{"a"}, {"b"}, {"c"}, {"d"}, {"e"}, {"f"}, {"g"}, {"h"}, {"i"}}),
-           /*compare_order=*/false}};
+           /*compare_order=*/false},
+          {/*dataset_params=*/
+           LongCycleDeteriministicParams(),
+           /*expected_outputs=*/
+           CreateTensors<tstring>(
+               TensorShape{1},
+               {{"a"}, {"d"}, {"g"}, {"b"}, {"e"}, {"h"}, {"c"}, {"f"}, {"i"}}),
+           /*compare_order=*/true}};
 }
 
 ITERATOR_GET_NEXT_TEST_P(ParallelInterleaveDatasetOpTest,

From 5b2a7961632701f9516d7b8e8815bff9c7eba6c0 Mon Sep 17 00:00:00 2001
From: rxsang <rxsang@google.com>
Date: Tue, 12 Nov 2019 10:37:32 -0800
Subject: [PATCH 007/130] Add XLA context to the function cache key

---
 tensorflow/python/eager/function.py | 46 ++++++++++++++++++++++++-----
 1 file changed, 39 insertions(+), 7 deletions(-)

diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 6c807e61746821..6cc2a5c0573243 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -54,6 +54,7 @@
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import tensor_spec
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import custom_gradient
 from tensorflow.python.ops import default_gradient
 from tensorflow.python.ops import functional_ops
@@ -125,8 +126,12 @@ def _make_input_signature_hashable(elem, variable_map=None):
 
 
 CacheKey = collections.namedtuple("CacheKey", [
-    "input_signature", "parent_graph", "device_functions", "colocation_stack",
-    "in_cross_replica_context"
+    "input_signature",
+    "parent_graph",
+    "device_functions",
+    "colocation_stack",
+    "in_cross_replica_context",
+    "xla_context_id",
 ])
 
 
@@ -356,6 +361,23 @@ def _inference_name(n):
   return "__inference_%s_%s" % (n, ops.uid())
 
 
+def _enclosing_xla_context():
+  """Returns the XLAControlFlowContext, which exists inside a tpu.rewrite()."""
+  graph = ops.get_default_graph()
+  while graph is not None:
+    # pylint: disable=protected-access
+    context_ = graph._get_control_flow_context()
+    # pylint: enable=protected-access
+    while context_ is not None:
+      if isinstance(context_, control_flow_ops.XLAControlFlowContext):
+        return context_
+      context_ = context_.outer_context
+    # This may be a FuncGraph due to defuns or v2 control flow. We need to
+    # find the original graph with the XLAControlFlowContext.
+    graph = getattr(graph, "outer_graph", None)
+  return None
+
+
 class _EagerDefinedFunctionDeleter(object):
   """Unregister function from eager context."""
 
@@ -2511,6 +2533,10 @@ def _cache_key(self, args, kwargs, include_tensor_ranks_only=False):
         device_functions = (pydev.merge_device(ctx.device_name),)
       else:
         device_functions = ()
+        
+      # We should not be in XLA context in eager mode. So always set
+      # `xla_context_id` to 0.
+      xla_context_id = 0
     else:
       colocation_stack = tuple(default_graph._colocation_stack.peek_objs())
       if (uses_distribution_strategy
@@ -2521,6 +2547,14 @@ def _cache_key(self, args, kwargs, include_tensor_ranks_only=False):
         device_functions = tuple(default_graph._device_functions_outer_to_inner)
       else:
         device_functions = ()
+        
+      # We want to force function retracing for each different
+      # XLAControlFlowContext, so add `xla_context_id` to the cache key.
+      tpu_context = _enclosing_xla_context()
+      if tpu_context is not None:
+        xla_context_id = id(tpu_context)
+      else:
+        xla_context_id = 0
 
     in_cross_replica_context = False
     try:
@@ -2529,11 +2563,9 @@ def _cache_key(self, args, kwargs, include_tensor_ranks_only=False):
       pass
 
     return CacheKey(
-        _make_input_signature_hashable(input_signature),
-        parent_graph,
-        device_functions,
-        colocation_stack,
-        in_cross_replica_context)
+        _make_input_signature_hashable(input_signature), parent_graph,
+        device_functions, colocation_stack, in_cross_replica_context,
+        xla_context_id)
 
   def _create_graph_function(self, args, kwargs, override_flat_arg_shapes=None):
     """Create a `ConcreteFunction` from `args` and `kwargs`."""

From 7ead42a0f2b969baa570a1a827efc029138eb20d Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Wed, 13 Nov 2019 08:12:28 -0800
Subject: [PATCH 008/130] [tf.data] Improving detection of infinitely repeated
 datasets in the presence of errors.

PiperOrigin-RevId: 280200721
Change-Id: Icfaffb567b970da140e9b0d3a6c2093452893f01
---
 tensorflow/core/framework/dataset.cc          |  1 +
 tensorflow/core/framework/dataset.h           |  6 ++++-
 .../core/kernels/data/shuffle_dataset_op.cc   | 18 +++++++++------
 .../python/data/kernel_tests/shuffle_test.py  | 23 +++++++++++++++++++
 tensorflow/python/eager/context.py            |  6 +----
 5 files changed, 41 insertions(+), 13 deletions(-)

diff --git a/tensorflow/core/framework/dataset.cc b/tensorflow/core/framework/dataset.cc
index fc6f8fdbb9012a..261d930269545b 100644
--- a/tensorflow/core/framework/dataset.cc
+++ b/tensorflow/core/framework/dataset.cc
@@ -403,6 +403,7 @@ Status DatasetBaseIterator::GetNext(IteratorContext* ctx,
                                     bool* end_of_sequence) {
   profiler::TraceMe activity([&] { return BuildTraceMeName(); },
                              profiler::TraceMeLevel::kInfo);
+  VLOG(3) << prefix() << " GetNext";
   RecordStart(ctx, /*stop_output=*/true);
   Status s = GetNextInternal(ctx, out_tensors, end_of_sequence);
   if (s.ok() && !*end_of_sequence) RecordElement(ctx);
diff --git a/tensorflow/core/framework/dataset.h b/tensorflow/core/framework/dataset.h
index 5a1fe974094a8e..2e5b23ffa53d26 100644
--- a/tensorflow/core/framework/dataset.h
+++ b/tensorflow/core/framework/dataset.h
@@ -815,9 +815,13 @@ class DatasetBaseIterator : public IteratorBase {
 
   explicit DatasetBaseIterator(const BaseParams& params) : params_(params) {
     params_.dataset->Ref();
+    VLOG(3) << prefix() << " constructor";
   }
 
-  ~DatasetBaseIterator() override { params_.dataset->Unref(); }
+  ~DatasetBaseIterator() override {
+    VLOG(3) << prefix() << " destructor";
+    params_.dataset->Unref();
+  }
 
   const DataTypeVector& output_dtypes() const override {
     return params_.dataset->output_dtypes();
diff --git a/tensorflow/core/kernels/data/shuffle_dataset_op.cc b/tensorflow/core/kernels/data/shuffle_dataset_op.cc
index 6f3b939bac5db4..674467abedfa5b 100644
--- a/tensorflow/core/kernels/data/shuffle_dataset_op.cc
+++ b/tensorflow/core/kernels/data/shuffle_dataset_op.cc
@@ -54,6 +54,7 @@ const int64 kLogIntervalMicros = 10 * 1000000;  // 10 seconds.
 const int64 kMaxEpochsInBuffer = 3;
 
 constexpr char kNumRandomSamples[] = "num_random_samples";
+constexpr char kDataProduced[] = "data_produced";
 constexpr char kEndOfInputSequence[] = "end_of_input_sequence";
 constexpr char kEpoch[] = "epoch";
 constexpr char kNumElements[] = "num_elements";
@@ -138,9 +139,7 @@ class ShuffleDatasetOpBase::ShuffleDatasetBase : public DatasetBase {
       mutex_lock l(mu_);
       int64 start_micros = ctx->env()->NowMicros();
       int64 num_log_entries = 0;
-      bool first_call = false;
       if (!input_impl_ && epoch_ == 0) {
-        first_call = true;
         TF_RETURN_IF_ERROR(this->dataset()->input_->MakeIterator(
             ctx, this->prefix(), &input_impl_));
       }
@@ -158,13 +157,12 @@ class ShuffleDatasetOpBase::ShuffleDatasetBase : public DatasetBase {
           TF_RETURN_IF_ERROR(input_impl_->GetNext(ctx, &input_element,
                                                   &end_of_input_sequence));
           if (!end_of_input_sequence) {
-            first_call = false;
+            data_produced_ = true;
             break;
           }
-          if (first_call && this->dataset()->count_ == -1) {
-            // If the first call to GetNext() fails because the end
-            // of sequence has been reached, we terminate the
-            // iteration immediately. (Otherwise, this iterator
+          if (!data_produced_ && this->dataset()->count_ == -1) {
+            // If we encounter the end of sequence without producing data, we
+            // terminate the iteration immediately. (Otherwise, this iterator
             // would loop infinitely and never produce a value.)
             *end_of_sequence = true;
             return Status::OK();
@@ -289,6 +287,10 @@ class ShuffleDatasetOpBase::ShuffleDatasetBase : public DatasetBase {
           }
         }
       }
+      if (data_produced_) {
+        TF_RETURN_IF_ERROR(
+            writer->WriteScalar(this->full_name(kDataProduced), ""));
+      }
 
       return Status::OK();
     }
@@ -353,6 +355,7 @@ class ShuffleDatasetOpBase::ShuffleDatasetBase : public DatasetBase {
           }
         }
       }
+      data_produced_ = reader->Contains(this->full_name(kDataProduced));
 
       return Status::OK();
     }
@@ -394,6 +397,7 @@ class ShuffleDatasetOpBase::ShuffleDatasetBase : public DatasetBase {
     random::SingleSampleAdapter<random::PhiloxRandom> generator_
         GUARDED_BY(mu_);
     int64 num_random_samples_ GUARDED_BY(mu_) = 0;
+    bool data_produced_ GUARDED_BY(mu_) = false;
   };
 
   const DatasetBase* const input_;
diff --git a/tensorflow/python/data/kernel_tests/shuffle_test.py b/tensorflow/python/data/kernel_tests/shuffle_test.py
index b2d2d23a8fa871..7f801e1b5f4dfc 100644
--- a/tensorflow/python/data/kernel_tests/shuffle_test.py
+++ b/tensorflow/python/data/kernel_tests/shuffle_test.py
@@ -32,6 +32,7 @@
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import random_seed
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 
@@ -308,6 +309,28 @@ def consume():
     consume()
     self.assertAllEqual(self.evaluate(counter_var), 10)
 
+  @combinations.generate(test_base.default_test_combinations())
+  def testEmptyDataset(self):
+    dataset = dataset_ops.Dataset.from_tensors(1)
+
+    def map_fn(x):
+      with ops.control_dependencies([check_ops.assert_equal(x, 0)]):
+        return x
+
+    dataset = dataset.map(map_fn)
+    dataset = dataset.cache()
+    dataset = dataset.shuffle(buffer_size=10).repeat()
+
+    get_next = self.getNext(dataset)
+
+    # First time around, we get an error for the failed assertion.
+    with self.assertRaises(errors.InvalidArgumentError):
+      self.evaluate(get_next())
+
+    # Second time around, we get an EOF because the cached dataset is empty.
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/eager/context.py b/tensorflow/python/eager/context.py
index 8de73bc35d14b4..7dce93066ed0bc 100644
--- a/tensorflow/python/eager/context.py
+++ b/tensorflow/python/eager/context.py
@@ -65,11 +65,7 @@
 MIRRORING_NONE = pywrap_tensorflow.TFE_MIRRORING_NONE
 MIRRORING_ALL = pywrap_tensorflow.TFE_MIRRORING_ALL
 
-# TODO(b/143164764): Currently _KEEP_ALIVE_SECS is set to a very long time
-# (i.e. 30 days) because the server may deadlock when destroying the eager
-# context. This may cause memory leak in the headless TPU case, we should change
-# it back to 600 once the deadlock is fixed.
-_KEEP_ALIVE_SECS = 2592000
+_KEEP_ALIVE_SECS = 600
 
 _python_eager_context_create_counter = monitoring.Counter(
     "/tensorflow/api/python/eager_context_create_counter",

From 664193ee3a6c230a480117a14559aefe769d34e8 Mon Sep 17 00:00:00 2001
From: Alex Stark <starka@google.com>
Date: Wed, 13 Nov 2019 11:28:22 -0800
Subject: [PATCH 009/130] Ruy: Optimize (partial) of x86 AVX-512 8-bit pack.

PiperOrigin-RevId: 280241069
Change-Id: I6cafe2b0e7e2dc13c098fd80780d9aa79d2a217b
---
 .../lite/experimental/ruy/pack_avx512.cc      | 329 ++++++------------
 1 file changed, 105 insertions(+), 224 deletions(-)

diff --git a/tensorflow/lite/experimental/ruy/pack_avx512.cc b/tensorflow/lite/experimental/ruy/pack_avx512.cc
index 4c3504724750bd..0c1466048816b8 100644
--- a/tensorflow/lite/experimental/ruy/pack_avx512.cc
+++ b/tensorflow/lite/experimental/ruy/pack_avx512.cc
@@ -76,22 +76,6 @@ inline void ZeroHalf8bitAvx512(int src_rows, std::int8_t packed_zero_point,
   }
 }
 
-inline __m512i LoaduTwo(const std::int8_t* addr_lo,
-                        const std::int8_t* addr_hi) {
-  __m512i lower_filled = _mm512_castsi256_si512(_mm256_loadu_epi8(addr_lo));
-  return _mm512_inserti32x8(lower_filled, _mm256_loadu_epi8(addr_hi), 1);
-}
-
-inline __m512i MaskLoaduTwo(__mmask32 row_mask, const __m256i default_value_v,
-                            const std::int8_t* addr_lo,
-                            const std::int8_t* addr_hi) {
-  const __m512i lower_filled = _mm512_castsi256_si512(
-      _mm256_mask_loadu_epi8(default_value_v, row_mask, addr_lo));
-  return _mm512_inserti32x8(
-      lower_filled, _mm256_mask_loadu_epi8(default_value_v, row_mask, addr_hi),
-      1);
-}
-
 inline void HalfPack8bitAvx512(const std::int8_t* src_ptr,
                                std::int8_t input_xor,
                                const std::int8_t* zerobuf, int src_stride,
@@ -99,13 +83,19 @@ inline void HalfPack8bitAvx512(const std::int8_t* src_ptr,
                                std::int8_t* packed_ptr, std::int32_t* sums_ptr,
                                std::int8_t* trailing_buf) {
   using Layout = PackImpl8bitAvx512::Layout;
+  static constexpr int kHalfLayoutCols =
+      PackImpl8bitAvx512::kHalfLayoutCols;  // Half the number of cols in a
+                                            // block.
   RUY_DCHECK_EQ(Layout::kCols, 16);
   RUY_DCHECK_EQ(Layout::kRows, 4);
+  RUY_DCHECK_EQ(kHalfLayoutCols, 8);
   // Each Layout::Rows is 4 contiguous input, contiguous packed elements.
   // We process 8 of these chunks at a time, padding short input chunks.
   constexpr int kNumRowChunks = 8;
   constexpr int kNumChunkedSrcRows = kNumRowChunks * Layout::kRows;
 
+  std::int8_t in_data[kHalfLayoutCols][kNumRowChunks][Layout::kRows];
+
   const std::int8_t* src_ptr0 = src_ptr;
   const std::int8_t* src_ptr1 = src_ptr0 + src_stride;
   const std::int8_t* src_ptr2 = src_ptr1 + src_stride;
@@ -164,8 +154,6 @@ inline void HalfPack8bitAvx512(const std::int8_t* src_ptr,
       sums_ptr[i] = 0;
     }
   }
-  __m512i sums_8x4_16bit = _mm512_set1_epi16(0);
-  std::int32_t sums_adjustment = 0;
 
   // The overall packing effectively pads the source rows to
   // (src_rows + 63) & ~63. The iteration over k may skip when m=1, and then we
@@ -184,195 +172,111 @@ inline void HalfPack8bitAvx512(const std::int8_t* src_ptr,
       // treat each case separately.
       if (available_src_rows >= kNumChunkedSrcRows) {
         // i: chunks, s: Layout::Rows.
-        if (sums_ptr) {
-          __m512i t0, t1, t2, t3;
-          __m512i r0, r1, r2, r3;
-          const __m512i input_xor_v = _mm512_set1_epi8(input_xor);
-
-          t0 = LoaduTwo(src_ptr0, src_ptr4);
-          t1 = LoaduTwo(src_ptr1, src_ptr5);
-          t2 = LoaduTwo(src_ptr2, src_ptr6);
-          t3 = LoaduTwo(src_ptr3, src_ptr7);
-
-          r0 = _mm512_unpacklo_epi32(t0, t1);
-          r2 = _mm512_unpackhi_epi32(t0, t1);
-          r1 = _mm512_unpacklo_epi32(t2, t3);
-          r3 = _mm512_unpackhi_epi32(t2, t3);
-
-          t0 = _mm512_unpacklo_epi64(r0, r1);
-          t2 = _mm512_unpackhi_epi64(r0, r1);
-          t1 = _mm512_unpacklo_epi64(r2, r3);
-          t3 = _mm512_unpackhi_epi64(r2, r3);
-
-          r0 = _mm512_shuffle_i32x4(t0, t1, 0x88);
-          r1 = _mm512_shuffle_i32x4(t0, t1, 0xdd);
-          r2 = _mm512_shuffle_i32x4(t2, t3, 0x88);
-          r3 = _mm512_shuffle_i32x4(t2, t3, 0xdd);
-
-          r0 = _mm512_xor_si512(r0, input_xor_v);
-          r1 = _mm512_xor_si512(r1, input_xor_v);
-          r2 = _mm512_xor_si512(r2, input_xor_v);
-          r3 = _mm512_xor_si512(r3, input_xor_v);
-
-          const __m256i r0_0 = _mm512_castsi512_si256(r0);
-          const __m256i r0_1 = _mm512_extracti32x8_epi32(r0, 1);
-          const __m256i r1_0 = _mm512_castsi512_si256(r1);
-          const __m256i r1_1 = _mm512_extracti32x8_epi32(r1, 1);
-          const __m256i r2_0 = _mm512_castsi512_si256(r2);
-          const __m256i r2_1 = _mm512_extracti32x8_epi32(r2, 1);
-          const __m256i r3_0 = _mm512_castsi512_si256(r3);
-          const __m256i r3_1 = _mm512_extracti32x8_epi32(r3, 1);
-          sums_8x4_16bit =
-              _mm512_add_epi16(sums_8x4_16bit, _mm512_cvtepi8_epi16(r0_0));
-          sums_8x4_16bit =
-              _mm512_add_epi16(sums_8x4_16bit, _mm512_cvtepi8_epi16(r0_1));
-          sums_8x4_16bit =
-              _mm512_add_epi16(sums_8x4_16bit, _mm512_cvtepi8_epi16(r1_0));
-          sums_8x4_16bit =
-              _mm512_add_epi16(sums_8x4_16bit, _mm512_cvtepi8_epi16(r1_1));
-          sums_8x4_16bit =
-              _mm512_add_epi16(sums_8x4_16bit, _mm512_cvtepi8_epi16(r2_0));
-          sums_8x4_16bit =
-              _mm512_add_epi16(sums_8x4_16bit, _mm512_cvtepi8_epi16(r2_1));
-          sums_8x4_16bit =
-              _mm512_add_epi16(sums_8x4_16bit, _mm512_cvtepi8_epi16(r3_0));
-          sums_8x4_16bit =
-              _mm512_add_epi16(sums_8x4_16bit, _mm512_cvtepi8_epi16(r3_1));
-          _mm256_storeu_epi8(packed_ptr + 0 * 16 * 4, r0_0);
-          _mm256_storeu_epi8(packed_ptr + 2 * 16 * 4, r0_1);
-          _mm256_storeu_epi8(packed_ptr + 4 * 16 * 4, r1_0);
-          _mm256_storeu_epi8(packed_ptr + 6 * 16 * 4, r1_1);
-          _mm256_storeu_epi8(packed_ptr + 1 * 16 * 4, r2_0);
-          _mm256_storeu_epi8(packed_ptr + 3 * 16 * 4, r2_1);
-          _mm256_storeu_epi8(packed_ptr + 5 * 16 * 4, r3_0);
-          _mm256_storeu_epi8(packed_ptr + 7 * 16 * 4, r3_1);
-        } else {
-          __m512i t0, t1, t2, t3;
-          __m512i r0, r1, r2, r3;
-          const __m512i input_xor_v = _mm512_set1_epi8(input_xor);
-
-          t0 = LoaduTwo(src_ptr0, src_ptr4);
-          t1 = LoaduTwo(src_ptr1, src_ptr5);
-          t2 = LoaduTwo(src_ptr2, src_ptr6);
-          t3 = LoaduTwo(src_ptr3, src_ptr7);
-
-          r0 = _mm512_unpacklo_epi32(t0, t1);
-          r2 = _mm512_unpackhi_epi32(t0, t1);
-          r1 = _mm512_unpacklo_epi32(t2, t3);
-          r3 = _mm512_unpackhi_epi32(t2, t3);
-
-          t0 = _mm512_unpacklo_epi64(r0, r1);
-          t2 = _mm512_unpackhi_epi64(r0, r1);
-          t1 = _mm512_unpacklo_epi64(r2, r3);
-          t3 = _mm512_unpackhi_epi64(r2, r3);
-
-          r0 = _mm512_shuffle_i32x4(t0, t1, 0x88);
-          r1 = _mm512_shuffle_i32x4(t0, t1, 0xdd);
-          r2 = _mm512_shuffle_i32x4(t2, t3, 0x88);
-          r3 = _mm512_shuffle_i32x4(t2, t3, 0xdd);
-
-          r0 = _mm512_xor_si512(r0, input_xor_v);
-          r1 = _mm512_xor_si512(r1, input_xor_v);
-          r2 = _mm512_xor_si512(r2, input_xor_v);
-          r3 = _mm512_xor_si512(r3, input_xor_v);
-
-          const __m256i r0_0 = _mm512_castsi512_si256(r0);
-          const __m256i r0_1 = _mm512_extracti32x8_epi32(r0, 1);
-          const __m256i r1_0 = _mm512_castsi512_si256(r1);
-          const __m256i r1_1 = _mm512_extracti32x8_epi32(r1, 1);
-          const __m256i r2_0 = _mm512_castsi512_si256(r2);
-          const __m256i r2_1 = _mm512_extracti32x8_epi32(r2, 1);
-          const __m256i r3_0 = _mm512_castsi512_si256(r3);
-          const __m256i r3_1 = _mm512_extracti32x8_epi32(r3, 1);
-          _mm256_storeu_epi8(packed_ptr + 0 * 16 * 4, r0_0);
-          _mm256_storeu_epi8(packed_ptr + 2 * 16 * 4, r0_1);
-          _mm256_storeu_epi8(packed_ptr + 4 * 16 * 4, r1_0);
-          _mm256_storeu_epi8(packed_ptr + 6 * 16 * 4, r1_1);
-          _mm256_storeu_epi8(packed_ptr + 1 * 16 * 4, r2_0);
-          _mm256_storeu_epi8(packed_ptr + 3 * 16 * 4, r2_1);
-          _mm256_storeu_epi8(packed_ptr + 5 * 16 * 4, r3_0);
-          _mm256_storeu_epi8(packed_ptr + 7 * 16 * 4, r3_1);
+        for (int i = 0; i < 8; ++i) {
+          for (int s = 0; s < 4; ++s) {
+            in_data[0][i][s] = src_ptr0[i * 4 + s];
+            in_data[1][i][s] = src_ptr1[i * 4 + s];
+            in_data[2][i][s] = src_ptr2[i * 4 + s];
+            in_data[3][i][s] = src_ptr3[i * 4 + s];
+            in_data[4][i][s] = src_ptr4[i * 4 + s];
+            in_data[5][i][s] = src_ptr5[i * 4 + s];
+            in_data[6][i][s] = src_ptr6[i * 4 + s];
+            in_data[7][i][s] = src_ptr7[i * 4 + s];
+          }
+        }
+        // i: chunks, j: kHalfLayoutCols, s: Layout::Rows.
+        for (int i = 0; i < 8; ++i) {
+          for (int j = 0; j < 8; ++j) {
+            for (int s = 0; s < 4; ++s) {
+              // 16 * 4 * i is offset for each block, that is
+              // (Layout::kCols * Layout::kRows * i)
+              packed_ptr[(16 * i + j) * 4 + s] = in_data[j][i][s] ^ input_xor;
+            }
+            if (sums_ptr) {
+              for (int s = 0; s < 4; ++s) {
+                sums_ptr[j] += in_data[j][i][s] ^ input_xor;
+              }
+            }
+          }
         }
       } else if (available_src_rows > 0) {
         RUY_DCHECK_LT(available_src_rows >> 2, kNumChunkedSrcRows);
-        const __mmask32 row_mask =
-            (static_cast<std::uint64_t>(1) << available_src_rows) - 1;
-
+        int i = 0;
+        // Consume chunks of 4 rows that are complete.
+        for (; i < (available_src_rows >> 2); ++i) {
+          for (int s = 0; s < 4; ++s) {
+            in_data[0][i][s] = src_ptr0[i * 4 + s];
+            in_data[1][i][s] = src_ptr1[i * 4 + s];
+            in_data[2][i][s] = src_ptr2[i * 4 + s];
+            in_data[3][i][s] = src_ptr3[i * 4 + s];
+            in_data[4][i][s] = src_ptr4[i * 4 + s];
+            in_data[5][i][s] = src_ptr5[i * 4 + s];
+            in_data[6][i][s] = src_ptr6[i * 4 + s];
+            in_data[7][i][s] = src_ptr7[i * 4 + s];
+          }
+        }
+        // Consume any incomplete chunk.
+        if (i < ((available_src_rows + 3) >> 2)) {
+          int s = 0;
+          for (; s < (available_src_rows & 3); ++s) {
+            in_data[0][i][s] = src_ptr0[i * 4 + s];
+            in_data[1][i][s] = src_ptr1[i * 4 + s];
+            in_data[2][i][s] = src_ptr2[i * 4 + s];
+            in_data[3][i][s] = src_ptr3[i * 4 + s];
+            in_data[4][i][s] = src_ptr4[i * 4 + s];
+            in_data[5][i][s] = src_ptr5[i * 4 + s];
+            in_data[6][i][s] = src_ptr6[i * 4 + s];
+            in_data[7][i][s] = src_ptr7[i * 4 + s];
+          }
+          RUY_DCHECK_LE(s, 4);
+          for (; s < 4; ++s) {
+            // j: kHalfLayoutCols.
+            for (int j = 0; j < 8; ++j) {
+              in_data[j][i][s] = zero_point;
+            }
+          }
+          ++i;
+        }
         // We do not care what goes into the trailing buffer, but we want
         // in_data[...] ^ input_xor == 0 for irrelevant values in the summation.
         //
-        // We compensate for padding-with-zero_point by initializing the
-        // summations with the compensating offset, effectively
-        // ((input_xor ^ input_xor) - (zero_point ^ input_xor)) *
+        // It might prove better in optimized code to pad uniformly with
+        // zero_point, and compensate by initializing the summations with the
+        // compensating offset, effectively
+        // ((input_xor - zero_point) ^ input_xor) *
         //                         4 * (8 - ((available_src_rows + 3) >> 2)).
+        for (; i < 8; ++i) {
+          for (int s = 0; s < 4; ++s) {
+            for (int j = 0; j < 8; ++j) {
+              in_data[j][i][s] = input_xor;
+            }
+          }
+        }
+        // We loop through [0, 8) rather than
+        // [0, (available_src_rows + 3) >> 2), since that emulates what we might
+        // do in fully-optimized code.
         //
-        // Note that (zero_point ^ input_xor) is performed in 8-bits and then
-        // cast.
-        sums_adjustment += -(zero_point ^ input_xor) * 4 *
-                           (8 - ((available_src_rows + 3) >> 2));
-
-        __m512i t0, t1, t2, t3;
-        __m512i r0, r1, r2, r3;
-        const __m512i input_xor_v = _mm512_set1_epi8(input_xor);
-        const __m256i zero_point_v = _mm256_set1_epi8(zero_point);
-
-        t0 = MaskLoaduTwo(row_mask, zero_point_v, src_ptr0, src_ptr4);
-        t1 = MaskLoaduTwo(row_mask, zero_point_v, src_ptr1, src_ptr5);
-        t2 = MaskLoaduTwo(row_mask, zero_point_v, src_ptr2, src_ptr6);
-        t3 = MaskLoaduTwo(row_mask, zero_point_v, src_ptr3, src_ptr7);
-
-        r0 = _mm512_unpacklo_epi32(t0, t1);
-        r2 = _mm512_unpackhi_epi32(t0, t1);
-        r1 = _mm512_unpacklo_epi32(t2, t3);
-        r3 = _mm512_unpackhi_epi32(t2, t3);
-
-        t0 = _mm512_unpacklo_epi64(r0, r1);
-        t2 = _mm512_unpackhi_epi64(r0, r1);
-        t1 = _mm512_unpacklo_epi64(r2, r3);
-        t3 = _mm512_unpackhi_epi64(r2, r3);
-
-        r0 = _mm512_shuffle_i32x4(t0, t1, 0x88);
-        r1 = _mm512_shuffle_i32x4(t0, t1, 0xdd);
-        r2 = _mm512_shuffle_i32x4(t2, t3, 0x88);
-        r3 = _mm512_shuffle_i32x4(t2, t3, 0xdd);
-
-        r0 = _mm512_xor_si512(r0, input_xor_v);
-        r1 = _mm512_xor_si512(r1, input_xor_v);
-        r2 = _mm512_xor_si512(r2, input_xor_v);
-        r3 = _mm512_xor_si512(r3, input_xor_v);
-
-        const __m256i r0_0 = _mm512_castsi512_si256(r0);
-        const __m256i r0_1 = _mm512_extracti32x8_epi32(r0, 1);
-        const __m256i r1_0 = _mm512_castsi512_si256(r1);
-        const __m256i r1_1 = _mm512_extracti32x8_epi32(r1, 1);
-        const __m256i r2_0 = _mm512_castsi512_si256(r2);
-        const __m256i r2_1 = _mm512_extracti32x8_epi32(r2, 1);
-        const __m256i r3_0 = _mm512_castsi512_si256(r3);
-        const __m256i r3_1 = _mm512_extracti32x8_epi32(r3, 1);
-        sums_8x4_16bit =
-            _mm512_add_epi16(sums_8x4_16bit, _mm512_cvtepi8_epi16(r0_0));
-        sums_8x4_16bit =
-            _mm512_add_epi16(sums_8x4_16bit, _mm512_cvtepi8_epi16(r0_1));
-        sums_8x4_16bit =
-            _mm512_add_epi16(sums_8x4_16bit, _mm512_cvtepi8_epi16(r1_0));
-        sums_8x4_16bit =
-            _mm512_add_epi16(sums_8x4_16bit, _mm512_cvtepi8_epi16(r1_1));
-        sums_8x4_16bit =
-            _mm512_add_epi16(sums_8x4_16bit, _mm512_cvtepi8_epi16(r2_0));
-        sums_8x4_16bit =
-            _mm512_add_epi16(sums_8x4_16bit, _mm512_cvtepi8_epi16(r2_1));
-        sums_8x4_16bit =
-            _mm512_add_epi16(sums_8x4_16bit, _mm512_cvtepi8_epi16(r3_0));
-        sums_8x4_16bit =
-            _mm512_add_epi16(sums_8x4_16bit, _mm512_cvtepi8_epi16(r3_1));
-        _mm256_storeu_epi8(trailing_buf + 0 * 16 * 4, r0_0);
-        _mm256_storeu_epi8(trailing_buf + 2 * 16 * 4, r0_1);
-        _mm256_storeu_epi8(trailing_buf + 4 * 16 * 4, r1_0);
-        _mm256_storeu_epi8(trailing_buf + 6 * 16 * 4, r1_1);
-        _mm256_storeu_epi8(trailing_buf + 1 * 16 * 4, r2_0);
-        _mm256_storeu_epi8(trailing_buf + 3 * 16 * 4, r2_1);
-        _mm256_storeu_epi8(trailing_buf + 5 * 16 * 4, r3_0);
-        _mm256_storeu_epi8(trailing_buf + 7 * 16 * 4, r3_1);
+        // i: chunks, j: kHalfLayoutCols, s: Layout::Rows.
+        if (sums_ptr) {
+          for (int i = 0; i < 8; ++i) {
+            for (int j = 0; j < 8; ++j) {
+              for (int s = 0; s < 4; ++s) {
+                trailing_buf[(16 * i + j) * 4 + s] =
+                    in_data[j][i][s] ^ input_xor;
+                sums_ptr[j] = sums_ptr[j] + (in_data[j][i][s] ^ input_xor);
+              }
+            }
+          }
+        } else {
+          for (int i = 0; i < 8; ++i) {
+            for (int j = 0; j < 8; ++j) {
+              for (int s = 0; s < 4; ++s) {
+                trailing_buf[(16 * i + j) * 4 + s] =
+                    in_data[j][i][s] ^ input_xor;
+              }
+            }
+          }
+        }
       }
 
       packed_ptr += 16 * kNumChunkedSrcRows;
@@ -386,39 +290,16 @@ inline void HalfPack8bitAvx512(const std::int8_t* src_ptr,
       src_ptr7 += src_inc7;
     }
   }
-
-  if (sums_ptr) {
-    const __m256i sums_adjustment_v = _mm256_set1_epi32(sums_adjustment);
-
-    __m256i sums = _mm256_loadu_epi32(sums_ptr);
-    const __m512i ones_16bit = _mm512_set1_epi16(1);
-    const __m512i idx =
-        _mm512_set_epi32(15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0);
-
-    // The sums have been performed across columns, and now we have 4x16-bit
-    // sums packed together. We use madd for pairwise 32-bit sums, then we
-    // deinterlace the neighbours, finshing up by adding them to the stored
-    // accumulated sums.
-    const __m512i sums_8x2_32bit =
-        _mm512_madd_epi16(sums_8x4_16bit, ones_16bit);
-    const __m512i sums_2x8_32bit =
-        _mm512_permutexvar_epi32(idx, sums_8x2_32bit);
-    sums = _mm256_add_epi32(sums, sums_adjustment_v);
-    sums = _mm256_add_epi32(sums, _mm512_castsi512_si256(sums_2x8_32bit));
-    sums = _mm256_add_epi32(sums, _mm512_extracti32x8_epi32(sums_2x8_32bit, 1));
-
-    _mm256_storeu_epi32(sums_ptr, sums);
-  }
 }
 
 inline __m512 LoaduTwo(const float* addr_lo, const float* addr_hi) {
-  const __m512 lower_filled = _mm512_castps256_ps512(_mm256_loadu_ps(addr_lo));
+  __m512 lower_filled = _mm512_castps256_ps512(_mm256_loadu_ps(addr_lo));
   return _mm512_insertf32x8(lower_filled, _mm256_loadu_ps(addr_hi), 1);
 }
 
 inline __m512 MaskLoaduTwo(__mmask8 row_mask, const float* addr_lo,
                            const float* addr_hi) {
-  const __m512 lower_filled =
+  __m512 lower_filled =
       _mm512_castps256_ps512(_mm256_maskz_loadu_ps(row_mask, addr_lo));
   return _mm512_insertf32x8(lower_filled,
                             _mm256_maskz_loadu_ps(row_mask, addr_hi), 1);

From 1c52525b0e3caf0fd6399de9f9e2e8d3f562efd3 Mon Sep 17 00:00:00 2001
From: Alex Stark <starka@google.com>
Date: Wed, 13 Nov 2019 11:28:49 -0800
Subject: [PATCH 010/130] Ruy: Further optimization (partial) of x86 AVX2 8-bit
 pack.

PiperOrigin-RevId: 280241172
Change-Id: I7e641a9ff00086f67e5e93676764799d44ef96d6
---
 tensorflow/lite/experimental/ruy/pack_avx2.cc | 215 ++++++------------
 1 file changed, 71 insertions(+), 144 deletions(-)

diff --git a/tensorflow/lite/experimental/ruy/pack_avx2.cc b/tensorflow/lite/experimental/ruy/pack_avx2.cc
index 95f39ca5b85fde..ad71a0860ea2f4 100644
--- a/tensorflow/lite/experimental/ruy/pack_avx2.cc
+++ b/tensorflow/lite/experimental/ruy/pack_avx2.cc
@@ -60,25 +60,6 @@ using PackImplFloatAvx2 =
 
 namespace {
 
-inline __m256i MaskLoadu(int available_src_rows, std::int8_t zero_point,
-                         const std::int8_t* addr) {
-  RUY_DCHECK_LT(available_src_rows, 32);
-  __m256i padded_data;
-
-  if (available_src_rows >= 16) {
-    __m128i load_hi = _mm_set1_epi8(zero_point);
-    __m128i load_lo = _mm_loadu_si128(reinterpret_cast<const __m128i*>(addr));
-    memcpy(&load_hi, addr + 16, available_src_rows - 16);
-    padded_data = _mm256_set_m128i(load_hi, load_lo);
-  } else {
-    __m128i load_hi = _mm_set1_epi8(zero_point);
-    __m128i load_lo = load_hi;
-    memcpy(&load_lo, addr, available_src_rows);
-    padded_data = _mm256_set_m128i(load_hi, load_lo);
-  }
-  return padded_data;
-}
-
 inline void Pack8bitAvx2Packer(const std::int8_t* src_ptr,
                                std::int8_t input_xor,
                                const std::int8_t* zerobuf, int src_stride,
@@ -93,6 +74,8 @@ inline void Pack8bitAvx2Packer(const std::int8_t* src_ptr,
   constexpr int kNumRowChunks = 8;
   constexpr int kNumChunkedSrcRows = kNumRowChunks * Layout::kRows;
 
+  std::int8_t in_data[Layout::kCols][kNumRowChunks][Layout::kRows];
+
   const std::int8_t* src_ptr0 = src_ptr;
   const std::int8_t* src_ptr1 = src_ptr0 + src_stride;
   const std::int8_t* src_ptr2 = src_ptr1 + src_stride;
@@ -153,7 +136,6 @@ inline void Pack8bitAvx2Packer(const std::int8_t* src_ptr,
   }
   __m256i sums_8x4_16bit_lo = _mm256_set1_epi16(0);
   __m256i sums_8x4_16bit_hi = _mm256_set1_epi16(0);
-  std::int32_t sums_adjustment = 0;
 
   // The overall packing effectively pads the source rows to
   // (src_rows + 63) & ~63. The iteration over k may skip when m=1, and then we
@@ -364,132 +346,80 @@ inline void Pack8bitAvx2Packer(const std::int8_t* src_ptr,
       }
     } else if (available_src_rows > 0) {
       RUY_DCHECK_LT(available_src_rows, kNumChunkedSrcRows);
-
+      int i = 0;
+      // Consume chunks of 4 rows that are complete.
+      for (; i < (available_src_rows >> 2); ++i) {
+        for (int s = 0; s < 4; ++s) {
+          in_data[0][i][s] = src_ptr0[i * 4 + s];
+          in_data[1][i][s] = src_ptr1[i * 4 + s];
+          in_data[2][i][s] = src_ptr2[i * 4 + s];
+          in_data[3][i][s] = src_ptr3[i * 4 + s];
+          in_data[4][i][s] = src_ptr4[i * 4 + s];
+          in_data[5][i][s] = src_ptr5[i * 4 + s];
+          in_data[6][i][s] = src_ptr6[i * 4 + s];
+          in_data[7][i][s] = src_ptr7[i * 4 + s];
+        }
+      }
+      // Consume any incomplete chunk.
+      if (i < ((available_src_rows + 3) >> 2)) {
+        int s = 0;
+        for (; s < (available_src_rows & 3); ++s) {
+          in_data[0][i][s] = src_ptr0[i * 4 + s];
+          in_data[1][i][s] = src_ptr1[i * 4 + s];
+          in_data[2][i][s] = src_ptr2[i * 4 + s];
+          in_data[3][i][s] = src_ptr3[i * 4 + s];
+          in_data[4][i][s] = src_ptr4[i * 4 + s];
+          in_data[5][i][s] = src_ptr5[i * 4 + s];
+          in_data[6][i][s] = src_ptr6[i * 4 + s];
+          in_data[7][i][s] = src_ptr7[i * 4 + s];
+        }
+        RUY_DCHECK_LE(s, 4);
+        for (; s < 4; ++s) {
+          // j: Layout::kCols.
+          for (int j = 0; j < 8; ++j) {
+            in_data[j][i][s] = zero_point;
+          }
+        }
+        ++i;
+      }
       // We do not care what goes into the trailing buffer, but we want
       // in_data[...] ^ input_xor == 0 for irrelevant values in the summation.
       //
-      // We compensate for padding-with-zero_point by initializing the
-      // summations with the compensating offset, effectively
-      // ((input_xor ^ input_xor) - (zero_point ^ input_xor)) *
+      // It might prove better in optimized code to pad uniformly with
+      // zero_point, and compensate by initializing the summations with the
+      // compensating offset, effectively
+      // ((input_xor - zero_point) ^ input_xor) *
       //                         4 * (8 - ((available_src_rows + 3) >> 2)).
+      for (; i < 8; ++i) {
+        for (int s = 0; s < 4; ++s) {
+          for (int j = 0; j < 8; ++j) {
+            in_data[j][i][s] = input_xor;
+          }
+        }
+      }
+      // We loop through [0, 8) rather than
+      // [0, (available_src_rows + 3) >> 2), since that emulates what we might
+      // do in fully-optimized code.
       //
-      // Note that (zero_point ^ input_xor) is performed in 8-bits and then
-      // cast.
-      sums_adjustment +=
-          -(zero_point ^ input_xor) * 4 * (8 - ((available_src_rows + 3) >> 2));
-
-      __m256i t0, t1, t2, t3, t4, t5, t6, t7;
-      __m256i r0, r1, r2, r3, r4, r5, r6, r7;
-      const __m256i input_xor_v = _mm256_set1_epi8(input_xor);
-
-      t0 = MaskLoadu(available_src_rows, zero_point, src_ptr0);
-      t4 = MaskLoadu(available_src_rows, zero_point, src_ptr4);
-      t1 = MaskLoadu(available_src_rows, zero_point, src_ptr1);
-      t5 = MaskLoadu(available_src_rows, zero_point, src_ptr5);
-      t2 = MaskLoadu(available_src_rows, zero_point, src_ptr2);
-      t6 = MaskLoadu(available_src_rows, zero_point, src_ptr6);
-      t3 = MaskLoadu(available_src_rows, zero_point, src_ptr3);
-      t7 = MaskLoadu(available_src_rows, zero_point, src_ptr7);
-
-      r0 = _mm256_unpacklo_epi32(t0, t1);
-      r4 = _mm256_unpacklo_epi32(t4, t5);
-      r2 = _mm256_unpackhi_epi32(t0, t1);
-      r6 = _mm256_unpackhi_epi32(t4, t5);
-      r1 = _mm256_unpacklo_epi32(t2, t3);
-      r5 = _mm256_unpacklo_epi32(t6, t7);
-      r3 = _mm256_unpackhi_epi32(t2, t3);
-      r7 = _mm256_unpackhi_epi32(t6, t7);
-
-      t0 = _mm256_unpacklo_epi64(r0, r1);
-      t4 = _mm256_unpacklo_epi64(r4, r5);
-      t2 = _mm256_unpackhi_epi64(r0, r1);
-      t6 = _mm256_unpackhi_epi64(r4, r5);
-      t1 = _mm256_unpacklo_epi64(r2, r3);
-      t5 = _mm256_unpacklo_epi64(r6, r7);
-      t3 = _mm256_unpackhi_epi64(r2, r3);
-      t7 = _mm256_unpackhi_epi64(r6, r7);
-
-      // The preceding sets of rearrangement operations interleaved by 4 bytes
-      // and then by 8 bytes *within* lanes. The following set interleave by
-      // 16 bytes (128-bit), operating *between* AVX lanes. For instance (t0,
-      // t4) are interleaved to create (r0, r1). This complexity follows from
-      // the way that AVX is centered around MM 128-bit lanes.
-      r0 = _mm256_permute2x128_si256(t0, t4, 0x20);
-      r4 = _mm256_permute2x128_si256(t1, t5, 0x20);
-      r1 = _mm256_permute2x128_si256(t0, t4, 0x31);
-      r5 = _mm256_permute2x128_si256(t1, t5, 0x31);
-      r2 = _mm256_permute2x128_si256(t2, t6, 0x20);
-      r6 = _mm256_permute2x128_si256(t3, t7, 0x20);
-      r3 = _mm256_permute2x128_si256(t2, t6, 0x31);
-      r7 = _mm256_permute2x128_si256(t3, t7, 0x31);
-
-      r0 = _mm256_xor_si256(r0, input_xor_v);
-      r1 = _mm256_xor_si256(r1, input_xor_v);
-      r2 = _mm256_xor_si256(r2, input_xor_v);
-      r3 = _mm256_xor_si256(r3, input_xor_v);
-      r4 = _mm256_xor_si256(r4, input_xor_v);
-      r5 = _mm256_xor_si256(r5, input_xor_v);
-      r6 = _mm256_xor_si256(r6, input_xor_v);
-      r7 = _mm256_xor_si256(r7, input_xor_v);
-
-      sums_8x4_16bit_lo = _mm256_add_epi16(
-          sums_8x4_16bit_lo, _mm256_cvtepi8_epi16(_mm256_castsi256_si128(r0)));
-      sums_8x4_16bit_lo = _mm256_add_epi16(
-          sums_8x4_16bit_lo, _mm256_cvtepi8_epi16(_mm256_castsi256_si128(r1)));
-      sums_8x4_16bit_lo = _mm256_add_epi16(
-          sums_8x4_16bit_lo, _mm256_cvtepi8_epi16(_mm256_castsi256_si128(r2)));
-      sums_8x4_16bit_lo = _mm256_add_epi16(
-          sums_8x4_16bit_lo, _mm256_cvtepi8_epi16(_mm256_castsi256_si128(r3)));
-      sums_8x4_16bit_lo = _mm256_add_epi16(
-          sums_8x4_16bit_lo, _mm256_cvtepi8_epi16(_mm256_castsi256_si128(r4)));
-      sums_8x4_16bit_lo = _mm256_add_epi16(
-          sums_8x4_16bit_lo, _mm256_cvtepi8_epi16(_mm256_castsi256_si128(r5)));
-      sums_8x4_16bit_lo = _mm256_add_epi16(
-          sums_8x4_16bit_lo, _mm256_cvtepi8_epi16(_mm256_castsi256_si128(r6)));
-      sums_8x4_16bit_lo = _mm256_add_epi16(
-          sums_8x4_16bit_lo, _mm256_cvtepi8_epi16(_mm256_castsi256_si128(r7)));
-
-      sums_8x4_16bit_hi = _mm256_add_epi16(
-          sums_8x4_16bit_hi,
-          _mm256_cvtepi8_epi16(_mm256_extracti128_si256(r0, 1)));
-      sums_8x4_16bit_hi = _mm256_add_epi16(
-          sums_8x4_16bit_hi,
-          _mm256_cvtepi8_epi16(_mm256_extracti128_si256(r1, 1)));
-      sums_8x4_16bit_hi = _mm256_add_epi16(
-          sums_8x4_16bit_hi,
-          _mm256_cvtepi8_epi16(_mm256_extracti128_si256(r2, 1)));
-      sums_8x4_16bit_hi = _mm256_add_epi16(
-          sums_8x4_16bit_hi,
-          _mm256_cvtepi8_epi16(_mm256_extracti128_si256(r3, 1)));
-      sums_8x4_16bit_hi = _mm256_add_epi16(
-          sums_8x4_16bit_hi,
-          _mm256_cvtepi8_epi16(_mm256_extracti128_si256(r4, 1)));
-      sums_8x4_16bit_hi = _mm256_add_epi16(
-          sums_8x4_16bit_hi,
-          _mm256_cvtepi8_epi16(_mm256_extracti128_si256(r5, 1)));
-      sums_8x4_16bit_hi = _mm256_add_epi16(
-          sums_8x4_16bit_hi,
-          _mm256_cvtepi8_epi16(_mm256_extracti128_si256(r6, 1)));
-      sums_8x4_16bit_hi = _mm256_add_epi16(
-          sums_8x4_16bit_hi,
-          _mm256_cvtepi8_epi16(_mm256_extracti128_si256(r7, 1)));
-
-      _mm256_storeu_si256(reinterpret_cast<__m256i*>(trailing_buf + 0 * 8 * 4),
-                          r0);
-      _mm256_storeu_si256(reinterpret_cast<__m256i*>(trailing_buf + 2 * 8 * 4),
-                          r4);
-      _mm256_storeu_si256(reinterpret_cast<__m256i*>(trailing_buf + 4 * 8 * 4),
-                          r1);
-      _mm256_storeu_si256(reinterpret_cast<__m256i*>(trailing_buf + 6 * 8 * 4),
-                          r5);
-      _mm256_storeu_si256(reinterpret_cast<__m256i*>(trailing_buf + 1 * 8 * 4),
-                          r2);
-      _mm256_storeu_si256(reinterpret_cast<__m256i*>(trailing_buf + 3 * 8 * 4),
-                          r6);
-      _mm256_storeu_si256(reinterpret_cast<__m256i*>(trailing_buf + 5 * 8 * 4),
-                          r3);
-      _mm256_storeu_si256(reinterpret_cast<__m256i*>(trailing_buf + 7 * 8 * 4),
-                          r7);
+      // i: chunks, j: Layout::kCols, s: Layout::Rows.
+      if (sums_ptr) {
+        for (int i = 0; i < 8; ++i) {
+          for (int j = 0; j < 8; ++j) {
+            for (int s = 0; s < 4; ++s) {
+              trailing_buf[(8 * i + j) * 4 + s] = in_data[j][i][s] ^ input_xor;
+              sums_ptr[j] = sums_ptr[j] + (in_data[j][i][s] ^ input_xor);
+            }
+          }
+        }
+      } else {
+        for (int i = 0; i < 8; ++i) {
+          for (int j = 0; j < 8; ++j) {
+            for (int s = 0; s < 4; ++s) {
+              trailing_buf[(8 * i + j) * 4 + s] = in_data[j][i][s] ^ input_xor;
+            }
+          }
+        }
+      }
     }
 
     packed_ptr += 8 * kNumChunkedSrcRows;
@@ -504,8 +434,6 @@ inline void Pack8bitAvx2Packer(const std::int8_t* src_ptr,
   }
 
   if (sums_ptr) {
-    const __m256i sums_adjustment_v = _mm256_set1_epi32(sums_adjustment);
-
     __m256i sums =
         _mm256_loadu_si256(reinterpret_cast<const __m256i*>(sums_ptr));
     const __m256i ones_16bit = _mm256_set1_epi16(1);
@@ -527,7 +455,6 @@ inline void Pack8bitAvx2Packer(const std::int8_t* src_ptr,
         _mm256_permute2x128_si256(sums_2x8_32bit_lo, sums_2x8_32bit_hi, 0x20);
     const __m256i sums_2x8_32bit_b =
         _mm256_permute2x128_si256(sums_2x8_32bit_lo, sums_2x8_32bit_hi, 0x31);
-    sums = _mm256_add_epi32(sums, sums_adjustment_v);
     sums = _mm256_add_epi32(sums, sums_2x8_32bit_a);
     sums = _mm256_add_epi32(sums, sums_2x8_32bit_b);
 

From 3f150ee789c90524d813cfdfb2906b265c2cb0fb Mon Sep 17 00:00:00 2001
From: TensorFlow Release Automation <jenkins@tensorflow.org>
Date: Wed, 13 Nov 2019 13:05:00 -0800
Subject: [PATCH 011/130] Insert release notes place-fill

---
 RELEASE.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/RELEASE.md b/RELEASE.md
index c415315f88270d..52fae8f489cd45 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -1,3 +1,7 @@
+# Release 2.1.0
+
+<REPLACE THIS TEXT WITH THE RELEASE NOTES>
+
 # Release 1.15.0
 This is the last 1.x release for TensorFlow. We do not expect to update the 1.x branch with features, although we will issue patch releases to fix vulnerabilities for at least one year. 
 

From 0738369be5eebfcce21391ccb8bb09a2f4750e2f Mon Sep 17 00:00:00 2001
From: TensorFlow Release Automation <jenkins@tensorflow.org>
Date: Wed, 13 Nov 2019 14:33:08 -0800
Subject: [PATCH 012/130] Update version numbers to 2.1.0-rc0

---
 tensorflow/core/public/version.h      | 4 ++--
 tensorflow/tensorflow.bzl             | 2 +-
 tensorflow/tools/pip_package/setup.py | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h
index 4c35788e5dee7c..04f45726cbf1f8 100644
--- a/tensorflow/core/public/version.h
+++ b/tensorflow/core/public/version.h
@@ -21,12 +21,12 @@ limitations under the License.
 // Also update tensorflow/tensorflow.bzl and
 // tensorflow/tools/pip_package/setup.py
 #define TF_MAJOR_VERSION 2
-#define TF_MINOR_VERSION 0
+#define TF_MINOR_VERSION 1
 #define TF_PATCH_VERSION 0
 
 // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
 // "-beta", "-rc", "-rc.1")
-#define TF_VERSION_SUFFIX ""
+#define TF_VERSION_SUFFIX "-rc0"
 
 #define TF_STR_HELPER(x) #x
 #define TF_STR(x) TF_STR_HELPER(x)
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index 5971d41525fb99..57b1382c231d5a 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -54,7 +54,7 @@ def register_extension_info(**kwargs):
 # not contain rc or alpha, only numbers.
 # Also update tensorflow/core/public/version.h
 # and tensorflow/tools/pip_package/setup.py
-VERSION = "2.0.0"
+VERSION = "2.1.0"
 VERSION_MAJOR = VERSION.split(".")[0]
 
 def if_v2(a):
diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index 58dca6fccff790..663afadaa7cb70 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -47,7 +47,7 @@
 # result for pip.
 # Also update tensorflow/tensorflow.bzl and
 # tensorflow/core/public/version.h
-_VERSION = '2.0.0'
+_VERSION = '2.1.0-rc0'
 
 REQUIRED_PACKAGES = [
     'absl-py >= 0.7.0',

From 0bd44b720ee9677aa3bec82b5084310fa6b2b79e Mon Sep 17 00:00:00 2001
From: Karim Nosir <karimnosseir@google.com>
Date: Fri, 8 Nov 2019 10:38:34 -0800
Subject: [PATCH 013/130] Label libs with alwayslink

PiperOrigin-RevId: 279344508
Change-Id: Id43dcf4dafc8d45eb4eacd5f2730a6fad0ddf133
---
 tensorflow/lite/BUILD                | 1 +
 tensorflow/lite/experimental/c/BUILD | 2 ++
 tensorflow/lite/kernels/BUILD        | 1 +
 3 files changed, 4 insertions(+)

diff --git a/tensorflow/lite/BUILD b/tensorflow/lite/BUILD
index 08b44eb42c641d..05c83b3001e544 100644
--- a/tensorflow/lite/BUILD
+++ b/tensorflow/lite/BUILD
@@ -218,6 +218,7 @@ cc_library(
         "//tensorflow/lite/nnapi:nnapi_implementation",
         "//tensorflow/lite/schema:schema_fbs",
     ],
+    alwayslink = 1,
 )
 
 cc_library(
diff --git a/tensorflow/lite/experimental/c/BUILD b/tensorflow/lite/experimental/c/BUILD
index 23b64f72d16f61..c9da03a070a88b 100644
--- a/tensorflow/lite/experimental/c/BUILD
+++ b/tensorflow/lite/experimental/c/BUILD
@@ -73,6 +73,7 @@ cc_library(
         "//tensorflow/lite/c:c_api_internal",
         "//tensorflow/lite/kernels:builtin_ops",
     ],
+    alwayslink = 1,
 )
 
 cc_library(
@@ -85,6 +86,7 @@ cc_library(
         ":c_api_internal",
         "//tensorflow/lite:kernel_api",
     ],
+    alwayslink = 1,
 )
 
 cc_test(
diff --git a/tensorflow/lite/kernels/BUILD b/tensorflow/lite/kernels/BUILD
index c137888214d835..65f7c858a7a5d6 100644
--- a/tensorflow/lite/kernels/BUILD
+++ b/tensorflow/lite/kernels/BUILD
@@ -542,6 +542,7 @@ cc_library(
         "@farmhash_archive//:farmhash",
         "@flatbuffers",
     ],
+    alwayslink = 1,
 )
 
 cc_library(

From 422496e55bc5b0229335ad91433a4b1387c5295d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 11 Nov 2019 12:33:13 -0800
Subject: [PATCH 014/130] Add more `alwayslink=1` to cc_library as a follow up
 on flipping the default of --incompatible_remove_legacy_whole_archive in
 cl/277339372

This should fix the latest issue reported in https://github.com/tensorflow/tensorflow/pull/33415.
Also fixes an internally reported missing symbol.

Related to https://github.com/bazelbuild/bazel/issues/7362

PiperOrigin-RevId: 279792794
Change-Id: I6f5d26ee37b9c886662df5e2daf9273c15cae865
---
 tensorflow/lite/kernels/BUILD | 1 +
 tensorflow/tensorflow.bzl     | 1 +
 2 files changed, 2 insertions(+)

diff --git a/tensorflow/lite/kernels/BUILD b/tensorflow/lite/kernels/BUILD
index 65f7c858a7a5d6..f717c785dc4e59 100644
--- a/tensorflow/lite/kernels/BUILD
+++ b/tensorflow/lite/kernels/BUILD
@@ -623,6 +623,7 @@ cc_library(
         "//tensorflow/lite:framework",
         "//tensorflow/lite/c:c_api_internal",
     ],
+    alwayslink = 1,
 )
 
 # The builtin_ops target will resolve to optimized kernels when available. This
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index 5971d41525fb99..16ce3c23dd4e91 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -2331,6 +2331,7 @@ def tf_generate_proto_text_sources(name, srcs_relative_dir, srcs, protodeps = []
         hdrs = out_hdrs,
         visibility = visibility,
         deps = deps,
+        alwayslink = 1,
     )
 
 def tf_genrule_cmd_append_to_srcs(to_append):

From f333bd6459440eb4c27c26d04e6732834b445536 Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Wed, 13 Nov 2019 08:12:28 -0800
Subject: [PATCH 015/130] [tf.data] Improving detection of infinitely repeated
 datasets in the presence of errors.

PiperOrigin-RevId: 280200721
Change-Id: Icfaffb567b970da140e9b0d3a6c2093452893f01
---
 tensorflow/core/framework/dataset.cc          |  1 +
 tensorflow/core/framework/dataset.h           |  6 ++++-
 .../core/kernels/data/shuffle_dataset_op.cc   | 18 +++++++++------
 .../python/data/kernel_tests/shuffle_test.py  | 23 +++++++++++++++++++
 tensorflow/python/eager/context.py            |  6 +----
 5 files changed, 41 insertions(+), 13 deletions(-)

diff --git a/tensorflow/core/framework/dataset.cc b/tensorflow/core/framework/dataset.cc
index fc6f8fdbb9012a..261d930269545b 100644
--- a/tensorflow/core/framework/dataset.cc
+++ b/tensorflow/core/framework/dataset.cc
@@ -403,6 +403,7 @@ Status DatasetBaseIterator::GetNext(IteratorContext* ctx,
                                     bool* end_of_sequence) {
   profiler::TraceMe activity([&] { return BuildTraceMeName(); },
                              profiler::TraceMeLevel::kInfo);
+  VLOG(3) << prefix() << " GetNext";
   RecordStart(ctx, /*stop_output=*/true);
   Status s = GetNextInternal(ctx, out_tensors, end_of_sequence);
   if (s.ok() && !*end_of_sequence) RecordElement(ctx);
diff --git a/tensorflow/core/framework/dataset.h b/tensorflow/core/framework/dataset.h
index 5a1fe974094a8e..2e5b23ffa53d26 100644
--- a/tensorflow/core/framework/dataset.h
+++ b/tensorflow/core/framework/dataset.h
@@ -815,9 +815,13 @@ class DatasetBaseIterator : public IteratorBase {
 
   explicit DatasetBaseIterator(const BaseParams& params) : params_(params) {
     params_.dataset->Ref();
+    VLOG(3) << prefix() << " constructor";
   }
 
-  ~DatasetBaseIterator() override { params_.dataset->Unref(); }
+  ~DatasetBaseIterator() override {
+    VLOG(3) << prefix() << " destructor";
+    params_.dataset->Unref();
+  }
 
   const DataTypeVector& output_dtypes() const override {
     return params_.dataset->output_dtypes();
diff --git a/tensorflow/core/kernels/data/shuffle_dataset_op.cc b/tensorflow/core/kernels/data/shuffle_dataset_op.cc
index 6f3b939bac5db4..674467abedfa5b 100644
--- a/tensorflow/core/kernels/data/shuffle_dataset_op.cc
+++ b/tensorflow/core/kernels/data/shuffle_dataset_op.cc
@@ -54,6 +54,7 @@ const int64 kLogIntervalMicros = 10 * 1000000;  // 10 seconds.
 const int64 kMaxEpochsInBuffer = 3;
 
 constexpr char kNumRandomSamples[] = "num_random_samples";
+constexpr char kDataProduced[] = "data_produced";
 constexpr char kEndOfInputSequence[] = "end_of_input_sequence";
 constexpr char kEpoch[] = "epoch";
 constexpr char kNumElements[] = "num_elements";
@@ -138,9 +139,7 @@ class ShuffleDatasetOpBase::ShuffleDatasetBase : public DatasetBase {
       mutex_lock l(mu_);
       int64 start_micros = ctx->env()->NowMicros();
       int64 num_log_entries = 0;
-      bool first_call = false;
       if (!input_impl_ && epoch_ == 0) {
-        first_call = true;
         TF_RETURN_IF_ERROR(this->dataset()->input_->MakeIterator(
             ctx, this->prefix(), &input_impl_));
       }
@@ -158,13 +157,12 @@ class ShuffleDatasetOpBase::ShuffleDatasetBase : public DatasetBase {
           TF_RETURN_IF_ERROR(input_impl_->GetNext(ctx, &input_element,
                                                   &end_of_input_sequence));
           if (!end_of_input_sequence) {
-            first_call = false;
+            data_produced_ = true;
             break;
           }
-          if (first_call && this->dataset()->count_ == -1) {
-            // If the first call to GetNext() fails because the end
-            // of sequence has been reached, we terminate the
-            // iteration immediately. (Otherwise, this iterator
+          if (!data_produced_ && this->dataset()->count_ == -1) {
+            // If we encounter the end of sequence without producing data, we
+            // terminate the iteration immediately. (Otherwise, this iterator
             // would loop infinitely and never produce a value.)
             *end_of_sequence = true;
             return Status::OK();
@@ -289,6 +287,10 @@ class ShuffleDatasetOpBase::ShuffleDatasetBase : public DatasetBase {
           }
         }
       }
+      if (data_produced_) {
+        TF_RETURN_IF_ERROR(
+            writer->WriteScalar(this->full_name(kDataProduced), ""));
+      }
 
       return Status::OK();
     }
@@ -353,6 +355,7 @@ class ShuffleDatasetOpBase::ShuffleDatasetBase : public DatasetBase {
           }
         }
       }
+      data_produced_ = reader->Contains(this->full_name(kDataProduced));
 
       return Status::OK();
     }
@@ -394,6 +397,7 @@ class ShuffleDatasetOpBase::ShuffleDatasetBase : public DatasetBase {
     random::SingleSampleAdapter<random::PhiloxRandom> generator_
         GUARDED_BY(mu_);
     int64 num_random_samples_ GUARDED_BY(mu_) = 0;
+    bool data_produced_ GUARDED_BY(mu_) = false;
   };
 
   const DatasetBase* const input_;
diff --git a/tensorflow/python/data/kernel_tests/shuffle_test.py b/tensorflow/python/data/kernel_tests/shuffle_test.py
index b2d2d23a8fa871..7f801e1b5f4dfc 100644
--- a/tensorflow/python/data/kernel_tests/shuffle_test.py
+++ b/tensorflow/python/data/kernel_tests/shuffle_test.py
@@ -32,6 +32,7 @@
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import random_seed
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 
@@ -308,6 +309,28 @@ def consume():
     consume()
     self.assertAllEqual(self.evaluate(counter_var), 10)
 
+  @combinations.generate(test_base.default_test_combinations())
+  def testEmptyDataset(self):
+    dataset = dataset_ops.Dataset.from_tensors(1)
+
+    def map_fn(x):
+      with ops.control_dependencies([check_ops.assert_equal(x, 0)]):
+        return x
+
+    dataset = dataset.map(map_fn)
+    dataset = dataset.cache()
+    dataset = dataset.shuffle(buffer_size=10).repeat()
+
+    get_next = self.getNext(dataset)
+
+    # First time around, we get an error for the failed assertion.
+    with self.assertRaises(errors.InvalidArgumentError):
+      self.evaluate(get_next())
+
+    # Second time around, we get an EOF because the cached dataset is empty.
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/eager/context.py b/tensorflow/python/eager/context.py
index 8de73bc35d14b4..7dce93066ed0bc 100644
--- a/tensorflow/python/eager/context.py
+++ b/tensorflow/python/eager/context.py
@@ -65,11 +65,7 @@
 MIRRORING_NONE = pywrap_tensorflow.TFE_MIRRORING_NONE
 MIRRORING_ALL = pywrap_tensorflow.TFE_MIRRORING_ALL
 
-# TODO(b/143164764): Currently _KEEP_ALIVE_SECS is set to a very long time
-# (i.e. 30 days) because the server may deadlock when destroying the eager
-# context. This may cause memory leak in the headless TPU case, we should change
-# it back to 600 once the deadlock is fixed.
-_KEEP_ALIVE_SECS = 2592000
+_KEEP_ALIVE_SECS = 600
 
 _python_eager_context_create_counter = monitoring.Counter(
     "/tensorflow/api/python/eager_context_create_counter",

From a594b6b38a08d85e5b0b7db472799ab85fa99be8 Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Thu, 14 Nov 2019 07:44:18 -0800
Subject: [PATCH 016/130] [tf.data] Improvements to cancellation logic.

This CL:
- makes sure that a previously cancelled input pipeline won't hang in ParallelMapDataset kernel
- excludes errors::Cancelled() from being ignored by the IgnoreErrorsDataset kernel to avoid infinitely looping after a cancellation

PiperOrigin-RevId: 280425330
Change-Id: If210c94c59e3b33b170e77aea6c87308241364e6
---
 tensorflow/core/framework/dataset.cc                       | 3 ++-
 tensorflow/core/framework/dataset.h                        | 4 ++--
 .../kernels/data/experimental/ignore_errors_dataset_op.cc  | 7 ++++---
 tensorflow/core/kernels/data/iterator_ops.h                | 6 +++++-
 tensorflow/core/kernels/data/parallel_map_dataset_op.cc    | 6 ++++++
 tensorflow/core/kernels/data/prefetch_dataset_op.cc        | 3 +--
 6 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/tensorflow/core/framework/dataset.cc b/tensorflow/core/framework/dataset.cc
index 261d930269545b..f27fa75eb7db7f 100644
--- a/tensorflow/core/framework/dataset.cc
+++ b/tensorflow/core/framework/dataset.cc
@@ -403,7 +403,7 @@ Status DatasetBaseIterator::GetNext(IteratorContext* ctx,
                                     bool* end_of_sequence) {
   profiler::TraceMe activity([&] { return BuildTraceMeName(); },
                              profiler::TraceMeLevel::kInfo);
-  VLOG(3) << prefix() << " GetNext";
+  DVLOG(3) << prefix() << " GetNext enter";
   RecordStart(ctx, /*stop_output=*/true);
   Status s = GetNextInternal(ctx, out_tensors, end_of_sequence);
   if (s.ok() && !*end_of_sequence) RecordElement(ctx);
@@ -416,6 +416,7 @@ Status DatasetBaseIterator::GetNext(IteratorContext* ctx,
                          s.error_message());
     LOG(ERROR) << s;
   }
+  DVLOG(3) << prefix() << " GetNext exit";
   return s;
 }
 
diff --git a/tensorflow/core/framework/dataset.h b/tensorflow/core/framework/dataset.h
index 2e5b23ffa53d26..ecb85a85ac139d 100644
--- a/tensorflow/core/framework/dataset.h
+++ b/tensorflow/core/framework/dataset.h
@@ -815,11 +815,11 @@ class DatasetBaseIterator : public IteratorBase {
 
   explicit DatasetBaseIterator(const BaseParams& params) : params_(params) {
     params_.dataset->Ref();
-    VLOG(3) << prefix() << " constructor";
+    VLOG(2) << prefix() << " constructor";
   }
 
   ~DatasetBaseIterator() override {
-    VLOG(3) << prefix() << " destructor";
+    VLOG(2) << prefix() << " destructor";
     params_.dataset->Unref();
   }
 
diff --git a/tensorflow/core/kernels/data/experimental/ignore_errors_dataset_op.cc b/tensorflow/core/kernels/data/experimental/ignore_errors_dataset_op.cc
index b9fb85ce7bf772..1dffff217a0ff6 100644
--- a/tensorflow/core/kernels/data/experimental/ignore_errors_dataset_op.cc
+++ b/tensorflow/core/kernels/data/experimental/ignore_errors_dataset_op.cc
@@ -87,14 +87,15 @@ class IgnoreErrorsDatasetOp : public UnaryDatasetOpKernel {
       Status GetNextInternal(IteratorContext* ctx,
                              std::vector<Tensor>* out_tensors,
                              bool* end_of_sequence) override {
+        Status s;
         {
           tf_shared_lock l(mu_);
           if (!input_impl_) {
             *end_of_sequence = true;
             return Status::OK();
           }
-          Status s = input_impl_->GetNext(ctx, out_tensors, end_of_sequence);
-          while (!s.ok()) {
+          s = input_impl_->GetNext(ctx, out_tensors, end_of_sequence);
+          while (!s.ok() && !errors::IsCancelled(s)) {
             out_tensors->clear();
             s = input_impl_->GetNext(ctx, out_tensors, end_of_sequence);
           }
@@ -103,7 +104,7 @@ class IgnoreErrorsDatasetOp : public UnaryDatasetOpKernel {
           mutex_lock l(mu_);
           input_impl_.reset();
         }
-        return Status::OK();
+        return s;
       }
 
      protected:
diff --git a/tensorflow/core/kernels/data/iterator_ops.h b/tensorflow/core/kernels/data/iterator_ops.h
index a24132e97ccb86..4f90b79265d11f 100644
--- a/tensorflow/core/kernels/data/iterator_ops.h
+++ b/tensorflow/core/kernels/data/iterator_ops.h
@@ -43,7 +43,11 @@ class IteratorResource : public ResourceBase {
         iterator_state_(std::make_shared<State>(
             std::move(flib_def), std::move(pflr), flr, /*iterator=*/nullptr)),
         output_dtypes_(output_dtypes),
-        output_shapes_(output_shapes) {}
+        output_shapes_(output_shapes) {
+    VLOG(2) << "constructor";
+  }
+
+  ~IteratorResource() override { VLOG(2) << "destructor"; }
 
   Status GetNext(OpKernelContext* ctx, std::vector<Tensor>* out_tensors,
                  bool* end_of_sequence);
diff --git a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
index 2a2c33296a2486..1658f0a63f2925 100644
--- a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
@@ -318,6 +318,9 @@ class ParallelMapIterator : public DatasetBaseIterator {
         cond_var_->wait(l);
         RecordStart(ctx);
       }
+      if (cancelled_) {
+        return errors::Cancelled("Iterator was cancelled");
+      }
     }
     RecordStop(ctx);
     result->notification.WaitForNotification();
@@ -555,6 +558,9 @@ class ParallelMapIterator : public DatasetBaseIterator {
   // false, `result` will point to the result.
   bool ShouldWait(std::shared_ptr<InvocationResult>* result)
       EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
+    if (cancelled_) {
+      return false;
+    }
     if (sloppy_) {
       for (auto it = invocation_results_.begin();
            it != invocation_results_.end(); ++it) {
diff --git a/tensorflow/core/kernels/data/prefetch_dataset_op.cc b/tensorflow/core/kernels/data/prefetch_dataset_op.cc
index 389b1cb856e738..618e2b17c94f4c 100644
--- a/tensorflow/core/kernels/data/prefetch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/prefetch_dataset_op.cc
@@ -192,8 +192,7 @@ class PrefetchDatasetOp::Dataset : public DatasetBase {
         }
 
         if (cancelled_) {
-          return errors::Cancelled(
-              "PrefetchDatasetOp::Dataset::Iterator::GetNext");
+          return errors::Cancelled("Iterator was cancelled");
         }
 
         if (!buffer_.empty()) {

From eb18ad3d01d8528caf73a0a4729fcad5e7ff6ddf Mon Sep 17 00:00:00 2001
From: Gaurav Jain <gjn@google.com>
Date: Wed, 13 Nov 2019 15:14:46 -0800
Subject: [PATCH 017/130] Support LogicalDevice in MirroredStrategy config

PiperOrigin-RevId: 280290757
Change-Id: I52dfff634e6e0ccdc81cd5cce682d7df3499b618
---
 tensorflow/python/distribute/device_util.py    |  8 ++++++--
 .../python/distribute/mirrored_strategy.py     | 16 +++++++++++-----
 .../remote_mirrored_strategy_eager_test.py     |  2 +-
 tensorflow/python/eager/context.py             |  6 +++++-
 tensorflow/python/tpu/tpu_system_metadata.py   | 18 +++++++-----------
 5 files changed, 30 insertions(+), 20 deletions(-)

diff --git a/tensorflow/python/distribute/device_util.py b/tensorflow/python/distribute/device_util.py
index d1295f27019d33..db6009d1a45e7f 100644
--- a/tensorflow/python/distribute/device_util.py
+++ b/tensorflow/python/distribute/device_util.py
@@ -38,13 +38,17 @@ def canonicalize(d, default=None):
   Note: This uses "job:localhost" as the default if executing eagerly.
 
   Args:
-    d: a device string.
+    d: a device string or tf.config.LogicalDevice
     default: a string for default device if d doesn't have all components.
 
   Returns:
     a canonicalized device string.
   """
-  d = tf_device.DeviceSpec.from_string(d)
+  if isinstance(d, context.LogicalDevice):
+    d = tf_device.DeviceSpec.from_string(d.name)
+  else:
+    d = tf_device.DeviceSpec.from_string(d)
+
   assert d.device_type is None or d.device_type == d.device_type.upper(), (
       "Device type '%s' must be all-caps." % (d.device_type,))
   # Fill in missing device fields using defaults.
diff --git a/tensorflow/python/distribute/mirrored_strategy.py b/tensorflow/python/distribute/mirrored_strategy.py
index baae903c14da9d..58c155a543424c 100644
--- a/tensorflow/python/distribute/mirrored_strategy.py
+++ b/tensorflow/python/distribute/mirrored_strategy.py
@@ -38,6 +38,7 @@
 from tensorflow.python.eager import context
 from tensorflow.python.eager import def_function
 from tensorflow.python.eager import tape
+from tensorflow.python.framework import config
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import device as tf_device
 from tensorflow.python.framework import dtypes
@@ -206,7 +207,8 @@ def _is_device_list_single_worker(devices):
   """Checks whether the devices list is for single or multi-worker.
 
   Args:
-    devices: a list of device strings, either local or for remote devices.
+    devices: a list of device strings or tf.config.LogicalDevice objects, for
+      either local or for remote devices.
 
   Returns:
     a boolean indicating whether these device strings are for local or for
@@ -215,7 +217,10 @@ def _is_device_list_single_worker(devices):
   Raises:
     ValueError: if device strings are not consistent.
   """
-  specs = (tf_device.DeviceSpec.from_string(d) for d in devices)
+  specs = []
+  for d in devices:
+    name = d.name if isinstance(d, context.LogicalDevice) else d
+    specs.append(tf_device.DeviceSpec.from_string(name))
   num_workers = len({(d.job, d.task, d.replica) for d in specs})
   all_local = all(d.job in (None, "localhost") for d in specs)
   any_local = any(d.job in (None, "localhost") for d in specs)
@@ -321,9 +326,10 @@ def _infer_num_gpus_per_worker(devices):
 
 
 def all_local_devices(num_gpus=None):
-  if num_gpus is None:
-    num_gpus = context.num_gpus()
-  return device_util.local_devices_from_num_gpus(num_gpus)
+  devices = config.list_logical_devices("GPU")
+  if num_gpus is not None:
+    devices = devices[:num_gpus]
+  return devices or config.list_logical_devices("CPU")
 
 
 def all_devices():
diff --git a/tensorflow/python/distribute/remote_mirrored_strategy_eager_test.py b/tensorflow/python/distribute/remote_mirrored_strategy_eager_test.py
index 1389ec393e026b..36ec919a57566b 100644
--- a/tensorflow/python/distribute/remote_mirrored_strategy_eager_test.py
+++ b/tensorflow/python/distribute/remote_mirrored_strategy_eager_test.py
@@ -30,7 +30,7 @@ def get_gpus():
   gpus = context.context().list_logical_devices("GPU")
   actual_gpus = []
   for gpu in gpus:
-    if "localhost" not in gpu.name:
+    if "job" in gpu.name:
       actual_gpus.append(gpu.name)
   return actual_gpus
 
diff --git a/tensorflow/python/eager/context.py b/tensorflow/python/eager/context.py
index 7dce93066ed0bc..f2ab167e24c0fa 100644
--- a/tensorflow/python/eager/context.py
+++ b/tensorflow/python/eager/context.py
@@ -470,8 +470,12 @@ def _initialize_logical_devices(self):
         dev_name = pywrap_tensorflow.TF_DeviceListName(device_list, i)
         self._context_devices.append(pydev.canonical_name(dev_name))
         spec = pydev.DeviceSpec.from_string(dev_name)
+        # If the job is localhost, we assume that the cluster has not yet been
+        # configured and thus clear the job, replica & task.
+        if spec.job == "localhost":
+          spec = spec.replace(job=None, replica=None, task=None)
         self._logical_devices.append(
-            LogicalDevice(name=dev_name, device_type=spec.device_type))
+            LogicalDevice(name=spec.to_string(), device_type=spec.device_type))
         dev_type = pywrap_tensorflow.TF_DeviceListType(device_list, i)
         if dev_type == "GPU":
           self._num_gpus += 1
diff --git a/tensorflow/python/tpu/tpu_system_metadata.py b/tensorflow/python/tpu/tpu_system_metadata.py
index 8628feee418479..1998e0e0aeb859 100644
--- a/tensorflow/python/tpu/tpu_system_metadata.py
+++ b/tensorflow/python/tpu/tpu_system_metadata.py
@@ -19,10 +19,10 @@
 from __future__ import print_function
 
 import collections
-import re
 
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.client import session as session_lib
+from tensorflow.python.distribute import device_util
 from tensorflow.python.eager import context
 from tensorflow.python.framework import config
 from tensorflow.python.framework import device as tf_device
@@ -35,8 +35,6 @@
 _RETRY_TIMES = 12 * 24  # 1 day
 _INITIAL_TPU_SYSTEM_TIMEOUT_IN_MS = 300 * 1000  # 5 mins
 
-_TPU_DEVICE_REG = re.compile(r'.*task:(\d+)/.*device:TPU:(\d+)$')
-
 _DEFAULT_JOB_NAME = 'tpu_worker'
 _DEFAULT_COORDINATOR_JOB_NAME = 'coordinator'
 _LOCAL_MASTERS = ('', 'local')
@@ -61,11 +59,11 @@ def _query_tpu_system_metadata(master_address, cluster_def=None,
 
   if context.executing_eagerly():
     logical_devices = config.list_logical_devices()
-    devices = []
 
     # We want the output type to match in both eager and session mode
-    for d in logical_devices:
-      devices.append(session_lib._DeviceAttributes(d.name, d.device_type, 0, 0))  # pylint: disable=protected-access
+    devices = [session_lib._DeviceAttributes(device_util.canonicalize(d.name),  # pylint: disable=protected-access
+                                             d.device_type, 0, 0)
+               for d in logical_devices]
   else:
     # TODO(b/120564445): Replace with standard library for retries.
     retry_count = 1
@@ -97,11 +95,9 @@ def _query_tpu_system_metadata(master_address, cluster_def=None,
           raise ValueError(msg)
 
   for device in devices:
-    match = _TPU_DEVICE_REG.match(device.name)
-    if match:
-      host_id = match.group(1)
-      core_id = match.group(2)
-      device_dict[host_id].append(core_id)
+    spec = tf_device.DeviceSpec.from_string(device.name)
+    if spec.device_type == 'TPU':
+      device_dict[spec.task].append(spec.device_index)
       tpu_core_count += 1
 
   num_of_cores_per_host = 0

From bc7534848a9057fea91a32edd8421445eab8d904 Mon Sep 17 00:00:00 2001
From: geetachavan1 <53313357+geetachavan1@users.noreply.github.com>
Date: Thu, 14 Nov 2019 15:56:14 -0800
Subject: [PATCH 018/130] Update BUILD

---
 tensorflow/lite/delegates/gpu/metal/BUILD | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/lite/delegates/gpu/metal/BUILD b/tensorflow/lite/delegates/gpu/metal/BUILD
index 3f4497748583f7..4bf443195df4bd 100644
--- a/tensorflow/lite/delegates/gpu/metal/BUILD
+++ b/tensorflow/lite/delegates/gpu/metal/BUILD
@@ -1,5 +1,5 @@
 load("@build_bazel_rules_apple//apple:ios.bzl", "ios_application", "ios_unit_test")
-load("//tensorflow/lite:special_rules.bzl", "tflite_portable_test_suite", "tflite_ios_per_kernel_test")
+load("//tensorflow/lite:special_rules.bzl", "tflite_ios_per_kernel_test", "tflite_portable_test_suite")
 
 package(
     default_visibility = ["//visibility:public"],

From e16b7cc06e6c2ef3c0a4c117dca1a0016d85194f Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanj@google.com>
Date: Tue, 12 Nov 2019 15:41:26 -0800
Subject: [PATCH 019/130] OwnedIterators and MultiDeviceIterators get created
 using their components at least twice during function tracing. We don't want
 to run the deleter in those cases as they are just referring to the original
 resource and not creating a new one. So we just create the deleter the first
 time.

PiperOrigin-RevId: 280071920
Change-Id: I266ac500246354e6e4d6145835c4f90926ec5715
---
 tensorflow/python/data/ops/iterator_ops.py           |  5 -----
 .../python/data/ops/multi_device_iterator_ops.py     | 12 ++++++------
 2 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/tensorflow/python/data/ops/iterator_ops.py b/tensorflow/python/data/ops/iterator_ops.py
index f7afe995a07cf0..ff8d02f9d713b0 100644
--- a/tensorflow/python/data/ops/iterator_ops.py
+++ b/tensorflow/python/data/ops/iterator_ops.py
@@ -585,11 +585,6 @@ def __init__(self, dataset=None, components=None, element_spec=None):
       self._flat_output_shapes = structure.get_flat_tensor_shapes(
           self._element_spec)
       self._iterator_resource, self._deleter = components
-      # Delete the resource when this object is deleted
-      self._resource_deleter = IteratorResourceDeleter(
-          handle=self._iterator_resource,
-          device=self._device,
-          deleter=self._deleter)
     else:
       if (components is not None or element_spec is not None):
         raise ValueError(error_message)
diff --git a/tensorflow/python/data/ops/multi_device_iterator_ops.py b/tensorflow/python/data/ops/multi_device_iterator_ops.py
index 0eb3a95d5a1bbd..4ed39c2d2f64bf 100644
--- a/tensorflow/python/data/ops/multi_device_iterator_ops.py
+++ b/tensorflow/python/data/ops/multi_device_iterator_ops.py
@@ -425,7 +425,7 @@ def _serialize(self):
   def _component_specs(self):
     specs = [
         tensor_spec.TensorSpec([], dtypes.resource),
-        tensor_spec.TensorSpec([], dtypes.scalar)
+        tensor_spec.TensorSpec([], dtypes.variant)
     ]
     for _ in range(len(self._devices)):
       specs.append(iterator_ops.IteratorSpec(self._element_spec))
@@ -565,11 +565,11 @@ def __init__(self,
           self._device_iterators.append(iterator)
           iterator_handles.append(iterator._iterator_resource)  # pylint: disable=protected-access
 
-    self._resource_deleter = MultiDeviceIteratorResourceDeleter(
-        multi_device_iterator=self._multi_device_iterator_resource,
-        iterators=iterator_handles,
-        device=self._source_device,
-        deleter=self._deleter)
+      self._resource_deleter = MultiDeviceIteratorResourceDeleter(
+          multi_device_iterator=self._multi_device_iterator_resource,
+          iterators=iterator_handles,
+          device=self._source_device,
+          deleter=self._deleter)
 
   def get_next(self, device=None):
     """Returns the next element given a `device`, else returns all in a list."""

From a5c0ac62c7927774bccc71063769e9f37195bd5a Mon Sep 17 00:00:00 2001
From: geetachavan1 <53313357+geetachavan1@users.noreply.github.com>
Date: Thu, 14 Nov 2019 16:04:17 -0800
Subject: [PATCH 020/130] Update BUILD

---
 tensorflow/lite/delegates/gpu/metal/kernels/BUILD | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/BUILD b/tensorflow/lite/delegates/gpu/metal/kernels/BUILD
index 2befc2d14e708b..84ea6cf2d8a7b2 100644
--- a/tensorflow/lite/delegates/gpu/metal/kernels/BUILD
+++ b/tensorflow/lite/delegates/gpu/metal/kernels/BUILD
@@ -1,5 +1,5 @@
 load("@build_bazel_rules_apple//apple:ios.bzl", "ios_unit_test")
-load("//tensorflow/lite:special_rules.bzl", "tflite_portable_test_suite", "tflite_ios_per_kernel_test")
+load("//tensorflow/lite:special_rules.bzl", "tflite_ios_per_kernel_test", "tflite_portable_test_suite")
 
 package(
     default_visibility = ["//visibility:public"],

From 30f6ae204afcc906c8ee1267d62dbca13be36590 Mon Sep 17 00:00:00 2001
From: geetachavan1 <53313357+geetachavan1@users.noreply.github.com>
Date: Thu, 14 Nov 2019 16:06:48 -0800
Subject: [PATCH 021/130] Update BUILD

---
 tensorflow/compiler/jit/BUILD | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD
index 20da141aa20e49..7ebba30b46e518 100644
--- a/tensorflow/compiler/jit/BUILD
+++ b/tensorflow/compiler/jit/BUILD
@@ -1,4 +1,4 @@
-load("//tensorflow:tensorflow.bzl", "if_mlir", "tf_cc_test", "cc_header_only_library")
+load("//tensorflow:tensorflow.bzl", "cc_header_only_library", "if_mlir", "tf_cc_test")
 load("//tensorflow/stream_executor:build_defs.bzl", "if_cuda_or_rocm")
 load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library", "tf_jit_compilation_passes_extra_deps")
 load("//tensorflow/core/platform:default/build_config.bzl", "tf_additional_all_protos", "tf_proto_library")

From 9477d81f07c5f69cd4fe5e1ce2e9e1ddc516540d Mon Sep 17 00:00:00 2001
From: geetachavan1 <53313357+geetachavan1@users.noreply.github.com>
Date: Thu, 14 Nov 2019 16:08:11 -0800
Subject: [PATCH 022/130] Update BUILD

---
 tensorflow/compiler/xla/BUILD | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/BUILD b/tensorflow/compiler/xla/BUILD
index 1723e60891d401..7315824964d866 100644
--- a/tensorflow/compiler/xla/BUILD
+++ b/tensorflow/compiler/xla/BUILD
@@ -1,4 +1,4 @@
-load("//tensorflow:tensorflow.bzl", "tf_cc_test", "cc_header_only_library")
+load("//tensorflow:tensorflow.bzl", "cc_header_only_library", "tf_cc_test")
 load("//tensorflow/compiler/xla:xla.bzl", "xla_proto_library")
 load(
     "//tensorflow/core/platform:default/build_config.bzl",

From 25fff852d0e04aafffbb07d589baf9f51c6fffbc Mon Sep 17 00:00:00 2001
From: geetachavan1 <53313357+geetachavan1@users.noreply.github.com>
Date: Thu, 14 Nov 2019 16:09:35 -0800
Subject: [PATCH 023/130] Update BUILD

---
 tensorflow/c/kernels/BUILD | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/c/kernels/BUILD b/tensorflow/c/kernels/BUILD
index 7cac7d78235feb..770352c62c1585 100644
--- a/tensorflow/c/kernels/BUILD
+++ b/tensorflow/c/kernels/BUILD
@@ -1,8 +1,8 @@
 load(
     "//tensorflow:tensorflow.bzl",
     "tf_cc_test",
-    "tf_kernel_library",
     "tf_gen_op_libs",
+    "tf_kernel_library",
 )
 
 package(

From be87e4561927193dcb642e9084ca60b7bb332b51 Mon Sep 17 00:00:00 2001
From: geetachavan1 <53313357+geetachavan1@users.noreply.github.com>
Date: Thu, 14 Nov 2019 16:13:39 -0800
Subject: [PATCH 024/130] Update BUILD

---
 tensorflow/core/debug/BUILD | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/debug/BUILD b/tensorflow/core/debug/BUILD
index ccdf478fabfb59..10e7c17abdc062 100644
--- a/tensorflow/core/debug/BUILD
+++ b/tensorflow/core/debug/BUILD
@@ -15,10 +15,10 @@
 load(
     "//tensorflow:tensorflow.bzl",
     "check_deps",
+    "tf_cc_binary",
     "tf_cc_test",
     "tf_copts",
     "tf_cuda_library",
-    "tf_cc_binary",
 )
 
 # For platform specific build config

From e02eb83bf0529ad637fc7e73db56a00bb44a1ad9 Mon Sep 17 00:00:00 2001
From: geetachavan1 <53313357+geetachavan1@users.noreply.github.com>
Date: Thu, 14 Nov 2019 16:14:58 -0800
Subject: [PATCH 025/130] Update BUILD

---
 tensorflow/core/profiler/internal/BUILD | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/profiler/internal/BUILD b/tensorflow/core/profiler/internal/BUILD
index b92d96e539e2c4..ac5c04938b0439 100644
--- a/tensorflow/core/profiler/internal/BUILD
+++ b/tensorflow/core/profiler/internal/BUILD
@@ -1,4 +1,4 @@
-load("//tensorflow:tensorflow.bzl", "tf_cc_test", "if_not_windows", "tf_cuda_library")
+load("//tensorflow:tensorflow.bzl", "if_not_windows", "tf_cc_test", "tf_cuda_library")
 
 package(
     default_visibility = ["//tensorflow:internal"],

From e7e8e546fa0f4394b4ecd5e4c41eb1cea5fcfcdf Mon Sep 17 00:00:00 2001
From: geetachavan1 <53313357+geetachavan1@users.noreply.github.com>
Date: Thu, 14 Nov 2019 16:18:00 -0800
Subject: [PATCH 026/130] Update BUILD

---
 tensorflow/core/profiler/internal/gpu/BUILD | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/profiler/internal/gpu/BUILD b/tensorflow/core/profiler/internal/gpu/BUILD
index 40a0a744b45628..84f7ec8804e593 100644
--- a/tensorflow/core/profiler/internal/gpu/BUILD
+++ b/tensorflow/core/profiler/internal/gpu/BUILD
@@ -1,8 +1,8 @@
 load(
     "//tensorflow:tensorflow.bzl",
-    "tf_copts",
-    "tf_cuda_library",
     "if_cuda_is_configured_compat",
+    "tf_copts",
+    "tf_cuda_library"
 )
 load("//tensorflow:tensorflow.bzl", "tf_cc_test_gpu")
 load(

From b6c155fababadb50dd65063c81f3f003378f436c Mon Sep 17 00:00:00 2001
From: geetachavan1 <53313357+geetachavan1@users.noreply.github.com>
Date: Fri, 15 Nov 2019 08:43:33 -0800
Subject: [PATCH 027/130] Update BUILD

---
 tensorflow/core/profiler/internal/gpu/BUILD | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/profiler/internal/gpu/BUILD b/tensorflow/core/profiler/internal/gpu/BUILD
index 84f7ec8804e593..bae5c40ece65d9 100644
--- a/tensorflow/core/profiler/internal/gpu/BUILD
+++ b/tensorflow/core/profiler/internal/gpu/BUILD
@@ -2,7 +2,7 @@ load(
     "//tensorflow:tensorflow.bzl",
     "if_cuda_is_configured_compat",
     "tf_copts",
-    "tf_cuda_library"
+    "tf_cuda_library,"
 )
 load("//tensorflow:tensorflow.bzl", "tf_cc_test_gpu")
 load(

From a62523dec20a989ef26f7d41d31ce1a027c13450 Mon Sep 17 00:00:00 2001
From: geetachavan1 <53313357+geetachavan1@users.noreply.github.com>
Date: Fri, 15 Nov 2019 09:05:56 -0800
Subject: [PATCH 028/130] Update BUILD

---
 tensorflow/core/profiler/internal/gpu/BUILD | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/profiler/internal/gpu/BUILD b/tensorflow/core/profiler/internal/gpu/BUILD
index bae5c40ece65d9..2067acc9a3497b 100644
--- a/tensorflow/core/profiler/internal/gpu/BUILD
+++ b/tensorflow/core/profiler/internal/gpu/BUILD
@@ -2,7 +2,7 @@ load(
     "//tensorflow:tensorflow.bzl",
     "if_cuda_is_configured_compat",
     "tf_copts",
-    "tf_cuda_library,"
+    "tf_cuda_library",
 )
 load("//tensorflow:tensorflow.bzl", "tf_cc_test_gpu")
 load(

From da0b330c56e0dc061c98d1155afbcb4cf7c345e5 Mon Sep 17 00:00:00 2001
From: Scott Zhu <scottzhu@google.com>
Date: Fri, 15 Nov 2019 13:04:10 -0800
Subject: [PATCH 029/130] Fix the performance regression for model.predict for
 non-tpu strategy.

PiperOrigin-RevId: 280718986
Change-Id: I8eed8854a58ec3adfaccd7d96067d248ee2696c4
---
 tensorflow/python/keras/engine/training_v2.py |  2 +-
 .../python/keras/engine/training_v2_utils.py  | 58 +++++++++++----
 .../keras/engine/training_v2_utils_test.py    | 71 ++++++++++++-------
 3 files changed, 91 insertions(+), 40 deletions(-)

diff --git a/tensorflow/python/keras/engine/training_v2.py b/tensorflow/python/keras/engine/training_v2.py
index 3025d186668ce3..476da84bcf7be5 100644
--- a/tensorflow/python/keras/engine/training_v2.py
+++ b/tensorflow/python/keras/engine/training_v2.py
@@ -436,7 +436,7 @@ def _model_iteration(
 
       # tf.print('{} on {} steps.'.format(ModeKeys.TRAIN, steps_per_epoch))
       training_context = TrainingContext()
-      if mode == ModeKeys.PREDICT:
+      if training_v2_utils._should_add_batch_index_to_element(strategy, mode):
         dataset = training_v2_utils._add_batch_index_to_element(dataset)
       dataset = strategy.experimental_distribute_dataset(dataset)
 
diff --git a/tensorflow/python/keras/engine/training_v2_utils.py b/tensorflow/python/keras/engine/training_v2_utils.py
index 92db3adc8234fa..596792e5fe571d 100644
--- a/tensorflow/python/keras/engine/training_v2_utils.py
+++ b/tensorflow/python/keras/engine/training_v2_utils.py
@@ -75,12 +75,12 @@ def _make_execution_function(model, mode):
 
   def distributed_function(input_iterator):
     """A single step of the distributed execution across replicas."""
-    args = _prepare_feed_values(model, input_iterator, mode)
     # Call `Model.{train,test,predict}_on_batch` on every replica passing
     # PerReplicas as arguments.  On every replica inside this call, each
     # PerReplica object will return the value for that replica.  The outputs
     # are PerReplicas too.
     strategy = distribution_strategy_context.get_strategy()
+    args = _prepare_feed_values(model, input_iterator, mode, strategy)
     outputs = strategy.experimental_run_v2(
         per_replica_function, args=args)
     # Out of PerReplica outputs reduce or pick values to return.
@@ -129,7 +129,7 @@ def _non_none_constant_value(v):
   return constant_value if constant_value is not None else v
 
 
-def _prepare_feed_values(model, inputs, mode):
+def _prepare_feed_values(model, inputs, mode, strategy):
   """Prepare feed values to the model execution function.
 
   Arguments:
@@ -138,6 +138,7 @@ def _prepare_feed_values(model, inputs, mode):
       model inputs may be lists, single values, or dicts mapping input feed
       names to values.
     mode: One of ModeKeys.TRAIN/ModeKeys.TEST/ModeKeys.PREDICT.
+    strategy: The current distribution strategy for the model.
 
   Returns:
     Feed values for the model in the given mode. This is a tuple of
@@ -146,7 +147,7 @@ def _prepare_feed_values(model, inputs, mode):
     for inputs will always be wrapped in lists.
   """
   # For predict, we need to extract the manually added batch_index first.
-  with_batch_index = mode == ModeKeys.PREDICT
+  with_batch_index = _should_add_batch_index_to_element(strategy, mode)
 
   inputs, targets, sample_weights, batch_index = _get_input_from_iterator(
       inputs, with_batch_index)
@@ -207,7 +208,10 @@ def _predict_on_batch(x, y=None, sample_weights=None, batch_index=None):
       del y, sample_weights
       # Note that the x and batch_index is already per-replica value.
       result = predict_on_batch(model, x)
-      return (batch_index, result)
+      if batch_index is None:
+        return result
+      else:
+        return batch_index, result
 
     func = _predict_on_batch
 
@@ -227,6 +231,9 @@ def _aggregate_predict_results(strategy, batch_outs, model):
   if not isinstance(batch_outs, list):
     batch_outs = [batch_outs]
 
+  with_batch_index = _should_add_batch_index_to_element(
+      strategy, ModeKeys.PREDICT)
+
   # batch_outs is in following structure:
   # [
   #  replica_1_batch_index, replica_2_batch_index, ...., replica_x_batch_index,
@@ -234,14 +241,19 @@ def _aggregate_predict_results(strategy, batch_outs, model):
   #  ......
   #  replica_1_output_y, replica_2_output_y, ...., replica_x_output_y,
   # ]
-  batch_index, batch_outs = batch_outs[:num_replicas], batch_outs[num_replicas:]
-  batch_index = dist_utils.concat_along_batch_dimension(batch_index)
-  # Reorder the batch_index for it to do proper gather. Eg, if the original
-  # index is [0, 2, 4, 6, 1, 3, 5, 7], then the index for gather should be
-  # [0, 4, 1, 5, 2, 6, 3, 7].
-  batch_index = np.argsort(batch_index)
-  # Only need to gather if the batch index is not sorted.
-  need_batch_index_gather = np.any(np.diff(batch_index) < 0)
+  # The replica_x_batch_index is optional and depended on teh strategy type.
+  if with_batch_index:
+    batch_index, batch_outs = (batch_outs[:num_replicas],
+                               batch_outs[num_replicas:])
+    batch_index = dist_utils.concat_along_batch_dimension(batch_index)
+    # Reorder the batch_index for it to do proper gather. Eg, if the original
+    # index is [0, 2, 4, 6, 1, 3, 5, 7], then the index for gather should be
+    # [0, 4, 1, 5, 2, 6, 3, 7].
+    batch_index = np.argsort(batch_index)
+    # Only need to gather if the batch index is not sorted.
+    need_batch_index_gather = np.any(np.diff(batch_index) < 0)
+  else:
+    need_batch_index_gather = False
 
   total_batch_outs = []
   for i in range(num_outputs):
@@ -318,6 +330,28 @@ def _add_batch_index_to_element(dataset):
   return dataset.map(lambda *inp: (math_ops.range(_get_batch_size(inp)), inp))
 
 
+def _should_add_batch_index_to_element(strategy, mode):
+  """Whether or not the batch index should be added to the input dataset.
+
+  See docstring of _add_batch_index_to_element() for more details. So far the
+  batch index is only need when using TPUStrategy with a multi-worker setting.
+  We will try to avoid adding batch index for other cases since it has the
+  performance implication.
+
+  Args:
+    strategy: the current distribution strategy for the model.
+    mode: the current mode (Training/Eval/Predict) for the model.
+  Returns:
+    Boolean, whether the batch index should be added for the input data to
+      preserve the ordering.
+  """
+  # TODO(priyag, rxsang): Come up a better way to determine when the batch index
+  # should be added.
+  return (mode == ModeKeys.PREDICT
+          and dist_utils.is_tpu_strategy(strategy)
+          and strategy.num_replicas_in_sync > 1)
+
+
 def train_on_batch(
     model,
     x,
diff --git a/tensorflow/python/keras/engine/training_v2_utils_test.py b/tensorflow/python/keras/engine/training_v2_utils_test.py
index 84f90fe9a820b5..4499ad3c8c65f7 100644
--- a/tensorflow/python/keras/engine/training_v2_utils_test.py
+++ b/tensorflow/python/keras/engine/training_v2_utils_test.py
@@ -21,6 +21,7 @@
 import collections
 
 from absl.testing import parameterized
+import mock
 import numpy as np
 
 
@@ -81,17 +82,20 @@ def dense_map_fn(i):
 
     start = 0
     for batch in distributed_data:
-      batch_result = self.predict_loop(batch)
-      final_result = training_v2_utils._aggregate_predict_results(
-          self.strategy, batch_result, self.mock_model)
-
-      # Make sure the dense result is in a sorted order.
-      expected_result = np.arange(
-          start=start, stop=start+self.batch_size).reshape((-1, 1))
-      expected_result = np.tile(expected_result, 6).reshape(
-          (-1,) + self.dense_shape)
-      self.assertAllClose(final_result[0], expected_result)
-      start += self.batch_size
+      with mock.patch.object(training_v2_utils,
+                             '_should_add_batch_index_to_element',
+                             fake_should_add_batch_index_to_element):
+        batch_result = self.predict_loop(batch)
+        final_result = training_v2_utils._aggregate_predict_results(
+            self.strategy, batch_result, self.mock_model)
+
+        # Make sure the dense result is in a sorted order.
+        expected_result = np.arange(
+            start=start, stop=start+self.batch_size).reshape((-1, 1))
+        expected_result = np.tile(expected_result, 6).reshape(
+            (-1,) + self.dense_shape)
+        self.assertAllClose(final_result[0], expected_result)
+        start += self.batch_size
 
   @combinations.generate(combinations.combine(tf_api_version=[1, 2],
                                               mode='eager'))
@@ -108,14 +112,17 @@ def sparse_map_fn(i):
 
     start = 0
     for batch in distributed_data:
-      batch_result = self.predict_loop(batch)
-      final_result = training_v2_utils._aggregate_predict_results(
-          self.strategy, batch_result, self.mock_model)
-
-      # Make sure the dense result is in a sorted order.
-      expected_values = np.arange(start=start, stop=start+self.batch_size)
-      self.assertAllClose(final_result[0].values, expected_values)
-      start += self.batch_size
+      with mock.patch.object(training_v2_utils,
+                             '_should_add_batch_index_to_element',
+                             fake_should_add_batch_index_to_element):
+        batch_result = self.predict_loop(batch)
+        final_result = training_v2_utils._aggregate_predict_results(
+            self.strategy, batch_result, self.mock_model)
+
+        # Make sure the dense result is in a sorted order.
+        expected_values = np.arange(start=start, stop=start+self.batch_size)
+        self.assertAllClose(final_result[0].values, expected_values)
+        start += self.batch_size
 
   @combinations.generate(combinations.combine(tf_api_version=[1, 2],
                                               mode='eager'))
@@ -129,14 +136,24 @@ def ragged_map_fn(i):
 
     start = 0
     for batch in distributed_data:
-      batch_result = self.predict_loop(batch)
-      final_result = training_v2_utils._aggregate_predict_results(
-          self.strategy, batch_result, self.mock_model)
-
-      # Make sure the dense result is in a sorted order.
-      expected_values = np.arange(start=start, stop=start+self.batch_size)
-      self.assertAllClose(final_result[0].flat_values, expected_values)
-      start += self.batch_size
+      with mock.patch.object(training_v2_utils,
+                             '_should_add_batch_index_to_element',
+                             fake_should_add_batch_index_to_element):
+        batch_result = self.predict_loop(batch)
+        final_result = training_v2_utils._aggregate_predict_results(
+            self.strategy, batch_result, self.mock_model)
+
+        # Make sure the dense result is in a sorted order.
+        expected_values = np.arange(start=start, stop=start+self.batch_size)
+        self.assertAllClose(final_result[0].flat_values, expected_values)
+        start += self.batch_size
+
+
+def fake_should_add_batch_index_to_element(strategy, mode):
+  # Ignore the strategy instance check since we were using the MirroredStrategy
+  # for testing.
+  del strategy
+  return mode == ModeKeys.PREDICT
 
 
 if __name__ == '__main__':

From ecd62bf7630dd9ff33f95187339c88d4ed8d69cc Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 13 Nov 2019 11:14:29 -0800
Subject: [PATCH 030/130] Update the TF CUDA version to 10.1

PiperOrigin-RevId: 280237880
Change-Id: Idb9ea64c73746e18b372f3c3a0e56b8b64d53b06
---
 configure.py                                                    | 2 +-
 .../ci_build/release/ubuntu_16/gpu_py2_full/nightly_release.sh  | 2 +-
 .../tools/ci_build/release/ubuntu_16/gpu_py2_full/nonpip.sh     | 2 +-
 .../tools/ci_build/release/ubuntu_16/gpu_py2_full/nonpip_v1.sh  | 2 +-
 tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/pip.sh | 2 +-
 .../tools/ci_build/release/ubuntu_16/gpu_py2_full/pip_v1.sh     | 2 +-
 .../ci_build/release/ubuntu_16/gpu_py35_full/nightly_release.sh | 2 +-
 .../tools/ci_build/release/ubuntu_16/gpu_py35_full/nonpip.sh    | 2 +-
 .../tools/ci_build/release/ubuntu_16/gpu_py35_full/nonpip_v1.sh | 2 +-
 .../tools/ci_build/release/ubuntu_16/gpu_py35_full/pip.sh       | 2 +-
 .../tools/ci_build/release/ubuntu_16/gpu_py35_full/pip_v1.sh    | 2 +-
 .../ci_build/release/ubuntu_16/gpu_py36_full/nightly_release.sh | 2 +-
 .../tools/ci_build/release/ubuntu_16/gpu_py36_full/nonpip.sh    | 2 +-
 .../tools/ci_build/release/ubuntu_16/gpu_py36_full/nonpip_v1.sh | 2 +-
 .../tools/ci_build/release/ubuntu_16/gpu_py36_full/pip.sh       | 2 +-
 .../tools/ci_build/release/ubuntu_16/gpu_py36_full/pip_v1.sh    | 2 +-
 .../ci_build/release/ubuntu_16/gpu_py37_full/nightly_release.sh | 2 +-
 .../tools/ci_build/release/ubuntu_16/gpu_py37_full/nonpip.sh    | 2 +-
 .../tools/ci_build/release/ubuntu_16/gpu_py37_full/nonpip_v1.sh | 2 +-
 .../tools/ci_build/release/ubuntu_16/gpu_py37_full/pip.sh       | 2 +-
 .../tools/ci_build/release/ubuntu_16/gpu_py37_full/pip_v1.sh    | 2 +-
 21 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/configure.py b/configure.py
index ff615a739acd6a..d29f3d4464979c 100644
--- a/configure.py
+++ b/configure.py
@@ -33,7 +33,7 @@
   from distutils.spawn import find_executable as which
 # pylint: enable=g-import-not-at-top
 
-_DEFAULT_CUDA_VERSION = '10'
+_DEFAULT_CUDA_VERSION = '10.1'
 _DEFAULT_CUDNN_VERSION = '7'
 _DEFAULT_TENSORRT_VERSION = '6'
 _DEFAULT_CUDA_COMPUTE_CAPABILITIES = '3.5,7.0'
diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/nightly_release.sh
index 0786f157cf22e6..a2d67494dc60b8 100644
--- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/nightly_release.sh
+++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/nightly_release.sh
@@ -31,7 +31,7 @@ export TF_NEED_GCP=1
 export TF_NEED_HDFS=1
 export TF_NEED_S3=1
 export TF_NEED_CUDA=1
-export TF_CUDA_VERSION=10
+export TF_CUDA_VERSION=10.1
 export TF_CUDNN_VERSION=7
 export TF_CUDA_COMPUTE_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0
 export TF_NEED_TENSORRT=1
diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/nonpip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/nonpip.sh
index f2e6a38abfc34d..ae6cb5aea81995 100644
--- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/nonpip.sh
+++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/nonpip.sh
@@ -27,7 +27,7 @@ export TF_NEED_GCP=1
 export TF_NEED_HDFS=1
 export TF_NEED_S3=1
 export TF_NEED_CUDA=1
-export TF_CUDA_VERSION=10
+export TF_CUDA_VERSION=10.1
 export TF_CUDNN_VERSION=7
 export TF_NEED_TENSORRT=1
 export TENSORRT_INSTALL_PATH=/usr/local/tensorrt
diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/nonpip_v1.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/nonpip_v1.sh
index 645a7624593c29..be52c7ca37cf76 100644
--- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/nonpip_v1.sh
+++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/nonpip_v1.sh
@@ -27,7 +27,7 @@ export TF_NEED_GCP=1
 export TF_NEED_HDFS=1
 export TF_NEED_S3=1
 export TF_NEED_CUDA=1
-export TF_CUDA_VERSION=10
+export TF_CUDA_VERSION=10.1
 export TF_CUDNN_VERSION=7
 export TF_NEED_TENSORRT=1
 export TENSORRT_INSTALL_PATH=/usr/local/tensorrt
diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/pip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/pip.sh
index 1d0064b5e4eaba..c77db80087f833 100644
--- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/pip.sh
+++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/pip.sh
@@ -32,7 +32,7 @@ export TF_NEED_GCP=1
 export TF_NEED_HDFS=1
 export TF_NEED_S3=1
 export TF_NEED_CUDA=1
-export TF_CUDA_VERSION=10
+export TF_CUDA_VERSION=10.1
 export TF_CUDNN_VERSION=7
 export TF_NEED_TENSORRT=1
 export TENSORRT_INSTALL_PATH=/usr/local/tensorrt
diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/pip_v1.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/pip_v1.sh
index 9218b90638dc9d..4959fcf8c5dba2 100644
--- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/pip_v1.sh
+++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/pip_v1.sh
@@ -32,7 +32,7 @@ export TF_NEED_GCP=1
 export TF_NEED_HDFS=1
 export TF_NEED_S3=1
 export TF_NEED_CUDA=1
-export TF_CUDA_VERSION=10
+export TF_CUDA_VERSION=10.1
 export TF_CUDNN_VERSION=7
 export TF_NEED_TENSORRT=1
 export TENSORRT_INSTALL_PATH=/usr/local/tensorrt
diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nightly_release.sh
index ea9c9c259ce7fe..04024cb0ce8a3e 100644
--- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nightly_release.sh
+++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nightly_release.sh
@@ -31,7 +31,7 @@ export TF_NEED_GCP=1
 export TF_NEED_HDFS=1
 export TF_NEED_S3=1
 export TF_NEED_CUDA=1
-export TF_CUDA_VERSION=10
+export TF_CUDA_VERSION=10.1
 export TF_CUDNN_VERSION=7
 export TF_CUDA_COMPUTE_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0
 export TF_NEED_TENSORRT=1
diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nonpip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nonpip.sh
index 609b06afbc33f2..13f6ce837a9717 100644
--- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nonpip.sh
+++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nonpip.sh
@@ -27,7 +27,7 @@ export TF_NEED_GCP=1
 export TF_NEED_HDFS=1
 export TF_NEED_S3=1
 export TF_NEED_CUDA=1
-export TF_CUDA_VERSION=10
+export TF_CUDA_VERSION=10.1
 export TF_CUDNN_VERSION=7
 export TF_NEED_TENSORRT=1
 export TENSORRT_INSTALL_PATH=/usr/local/tensorrt
diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nonpip_v1.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nonpip_v1.sh
index e036175bf8f8f7..b45174a7b7fdb6 100644
--- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nonpip_v1.sh
+++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nonpip_v1.sh
@@ -27,7 +27,7 @@ export TF_NEED_GCP=1
 export TF_NEED_HDFS=1
 export TF_NEED_S3=1
 export TF_NEED_CUDA=1
-export TF_CUDA_VERSION=10
+export TF_CUDA_VERSION=10.1
 export TF_CUDNN_VERSION=7
 export TF_NEED_TENSORRT=1
 export TENSORRT_INSTALL_PATH=/usr/local/tensorrt
diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/pip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/pip.sh
index 53b0e6d8a1fc87..4fe4edb8d9cad9 100644
--- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/pip.sh
+++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/pip.sh
@@ -32,7 +32,7 @@ export TF_NEED_GCP=1
 export TF_NEED_HDFS=1
 export TF_NEED_S3=1
 export TF_NEED_CUDA=1
-export TF_CUDA_VERSION=10
+export TF_CUDA_VERSION=10.1
 export TF_CUDNN_VERSION=7
 export TF_NEED_TENSORRT=1
 export TENSORRT_INSTALL_PATH=/usr/local/tensorrt
diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/pip_v1.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/pip_v1.sh
index efd52c53362393..a03388fd992284 100644
--- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/pip_v1.sh
+++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/pip_v1.sh
@@ -32,7 +32,7 @@ export TF_NEED_GCP=1
 export TF_NEED_HDFS=1
 export TF_NEED_S3=1
 export TF_NEED_CUDA=1
-export TF_CUDA_VERSION=10
+export TF_CUDA_VERSION=10.1
 export TF_CUDNN_VERSION=7
 export TF_NEED_TENSORRT=1
 export TENSORRT_INSTALL_PATH=/usr/local/tensorrt
diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nightly_release.sh
index dc4636f6576231..21ef6ec1a85877 100644
--- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nightly_release.sh
+++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nightly_release.sh
@@ -31,7 +31,7 @@ export TF_NEED_GCP=1
 export TF_NEED_HDFS=1
 export TF_NEED_S3=1
 export TF_NEED_CUDA=1
-export TF_CUDA_VERSION=10
+export TF_CUDA_VERSION=10.1
 export TF_CUDNN_VERSION=7
 export TF_CUDA_COMPUTE_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0
 export TF_NEED_TENSORRT=1
diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nonpip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nonpip.sh
index 93d3fcfec359cc..38ce102e990e5b 100644
--- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nonpip.sh
+++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nonpip.sh
@@ -27,7 +27,7 @@ export TF_NEED_GCP=1
 export TF_NEED_HDFS=1
 export TF_NEED_S3=1
 export TF_NEED_CUDA=1
-export TF_CUDA_VERSION=10
+export TF_CUDA_VERSION=10.1
 export TF_CUDNN_VERSION=7
 export TF_NEED_TENSORRT=1
 export TENSORRT_INSTALL_PATH=/usr/local/tensorrt
diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nonpip_v1.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nonpip_v1.sh
index ee041d306fb300..888d8106b97123 100644
--- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nonpip_v1.sh
+++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nonpip_v1.sh
@@ -27,7 +27,7 @@ export TF_NEED_GCP=1
 export TF_NEED_HDFS=1
 export TF_NEED_S3=1
 export TF_NEED_CUDA=1
-export TF_CUDA_VERSION=10
+export TF_CUDA_VERSION=10.1
 export TF_CUDNN_VERSION=7
 export TF_NEED_TENSORRT=1
 export TENSORRT_INSTALL_PATH=/usr/local/tensorrt
diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/pip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/pip.sh
index 2e23e6edd76e88..e24b9f5019f249 100644
--- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/pip.sh
+++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/pip.sh
@@ -32,7 +32,7 @@ export TF_NEED_GCP=1
 export TF_NEED_HDFS=1
 export TF_NEED_S3=1
 export TF_NEED_CUDA=1
-export TF_CUDA_VERSION=10
+export TF_CUDA_VERSION=10.1
 export TF_CUDNN_VERSION=7
 export TF_NEED_TENSORRT=1
 export TENSORRT_INSTALL_PATH=/usr/local/tensorrt
diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/pip_v1.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/pip_v1.sh
index 5c96c542088a14..fde847237c0210 100644
--- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/pip_v1.sh
+++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/pip_v1.sh
@@ -32,7 +32,7 @@ export TF_NEED_GCP=1
 export TF_NEED_HDFS=1
 export TF_NEED_S3=1
 export TF_NEED_CUDA=1
-export TF_CUDA_VERSION=10
+export TF_CUDA_VERSION=10.1
 export TF_CUDNN_VERSION=7
 export TF_NEED_TENSORRT=1
 export TENSORRT_INSTALL_PATH=/usr/local/tensorrt
diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nightly_release.sh
index 09da2a756ff126..e44cfd0777da1a 100644
--- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nightly_release.sh
+++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nightly_release.sh
@@ -31,7 +31,7 @@ export TF_NEED_GCP=1
 export TF_NEED_HDFS=1
 export TF_NEED_S3=1
 export TF_NEED_CUDA=1
-export TF_CUDA_VERSION=10
+export TF_CUDA_VERSION=10.1
 export TF_CUDNN_VERSION=7
 export TF_CUDA_COMPUTE_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0
 export TF_NEED_TENSORRT=1
diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nonpip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nonpip.sh
index a3985d24e3efd5..0a7bbb381378aa 100644
--- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nonpip.sh
+++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nonpip.sh
@@ -27,7 +27,7 @@ export TF_NEED_GCP=1
 export TF_NEED_HDFS=1
 export TF_NEED_S3=1
 export TF_NEED_CUDA=1
-export TF_CUDA_VERSION=10
+export TF_CUDA_VERSION=10.1
 export TF_CUDNN_VERSION=7
 export TF_NEED_TENSORRT=1
 export TENSORRT_INSTALL_PATH=/usr/local/tensorrt
diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nonpip_v1.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nonpip_v1.sh
index c99e47e791b0e9..506aa3e857faaf 100644
--- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nonpip_v1.sh
+++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nonpip_v1.sh
@@ -27,7 +27,7 @@ export TF_NEED_GCP=1
 export TF_NEED_HDFS=1
 export TF_NEED_S3=1
 export TF_NEED_CUDA=1
-export TF_CUDA_VERSION=10
+export TF_CUDA_VERSION=10.1
 export TF_CUDNN_VERSION=7
 export TF_NEED_TENSORRT=1
 export TENSORRT_INSTALL_PATH=/usr/local/tensorrt
diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/pip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/pip.sh
index da1830cdd72391..ff30c1e88af401 100644
--- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/pip.sh
+++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/pip.sh
@@ -32,7 +32,7 @@ export TF_NEED_GCP=1
 export TF_NEED_HDFS=1
 export TF_NEED_S3=1
 export TF_NEED_CUDA=1
-export TF_CUDA_VERSION=10
+export TF_CUDA_VERSION=10.1
 export TF_CUDNN_VERSION=7
 export TF_NEED_TENSORRT=1
 export TENSORRT_INSTALL_PATH=/usr/local/tensorrt
diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/pip_v1.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/pip_v1.sh
index 7787c1ee519971..be85dbfc065c97 100644
--- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/pip_v1.sh
+++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/pip_v1.sh
@@ -32,7 +32,7 @@ export TF_NEED_GCP=1
 export TF_NEED_HDFS=1
 export TF_NEED_S3=1
 export TF_NEED_CUDA=1
-export TF_CUDA_VERSION=10
+export TF_CUDA_VERSION=10.1
 export TF_CUDNN_VERSION=7
 export TF_NEED_TENSORRT=1
 export TENSORRT_INSTALL_PATH=/usr/local/tensorrt

From 17c2ea1be6b0149fc7618250ce12d5ce7b02774e Mon Sep 17 00:00:00 2001
From: Geeta Chavan <geetac@google.com>
Date: Fri, 15 Nov 2019 16:09:06 -0800
Subject: [PATCH 031/130] Updating/setting xcode version to 10.3

PiperOrigin-RevId: 280756701
Change-Id: I3b4c8ce78ef5a68a0a23f32ac25caf527597c700
---
 .../tools/ci_build/release/macos/cpu_py2_full/nonpip.sh       | 3 ++-
 tensorflow/tools/ci_build/release/macos/cpu_py2_full/pip.sh   | 4 ++++
 .../tools/ci_build/release/macos/cpu_py35_full/nonpip.sh      | 3 ++-
 tensorflow/tools/ci_build/release/macos/cpu_py35_full/pip.sh  | 4 ++++
 .../tools/ci_build/release/macos/cpu_py36_full/nonpip.sh      | 3 ++-
 tensorflow/tools/ci_build/release/macos/cpu_py36_full/pip.sh  | 4 ++++
 .../tools/ci_build/release/macos/cpu_py37_full/nonpip.sh      | 3 ++-
 tensorflow/tools/ci_build/release/macos/cpu_py37_full/pip.sh  | 4 ++++
 8 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py2_full/nonpip.sh b/tensorflow/tools/ci_build/release/macos/cpu_py2_full/nonpip.sh
index b1c27c2f7b3aa8..e3b74060823279 100644
--- a/tensorflow/tools/ci_build/release/macos/cpu_py2_full/nonpip.sh
+++ b/tensorflow/tools/ci_build/release/macos/cpu_py2_full/nonpip.sh
@@ -24,7 +24,8 @@ bazel version
 set_bazel_outdir
 
 # Pick a more recent version of xcode
-sudo xcode-select --switch /Applications/Xcode_9.2.app/Contents/Developer
+export DEVELOPER_DIR=/Applications/Xcode_10.3.app/Contents/Developer
+sudo xcode-select -s "${DEVELOPER_DIR}"
 python -m virtualenv tf_build_env --system-site-packages
 source tf_build_env/bin/activate
 
diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py2_full/pip.sh b/tensorflow/tools/ci_build/release/macos/cpu_py2_full/pip.sh
index 624690b4f6554a..98c241f0751ce4 100644
--- a/tensorflow/tools/ci_build/release/macos/cpu_py2_full/pip.sh
+++ b/tensorflow/tools/ci_build/release/macos/cpu_py2_full/pip.sh
@@ -23,6 +23,10 @@ which bazel
 bazel version
 set_bazel_outdir
 
+# Pick a more recent version of xcode
+export DEVELOPER_DIR=/Applications/Xcode_10.3.app/Contents/Developer
+sudo xcode-select -s "${DEVELOPER_DIR}"
+
 # Install macos pip dependencies
 install_macos_pip_deps sudo
 
diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py35_full/nonpip.sh b/tensorflow/tools/ci_build/release/macos/cpu_py35_full/nonpip.sh
index 8142cdb2019699..d821656ba12efe 100644
--- a/tensorflow/tools/ci_build/release/macos/cpu_py35_full/nonpip.sh
+++ b/tensorflow/tools/ci_build/release/macos/cpu_py35_full/nonpip.sh
@@ -24,7 +24,8 @@ bazel version
 set_bazel_outdir
 
 # Pick a more recent version of xcode
-sudo xcode-select --switch /Applications/Xcode_9.2.app/Contents/Developer
+export DEVELOPER_DIR=/Applications/Xcode_10.3.app/Contents/Developer
+sudo xcode-select -s "${DEVELOPER_DIR}"
 python3.5 -m virtualenv tf_build_env --system-site-packages
 source tf_build_env/bin/activate
 
diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py35_full/pip.sh b/tensorflow/tools/ci_build/release/macos/cpu_py35_full/pip.sh
index 8e5ea5cee7faf5..a1cbfde33fe78a 100644
--- a/tensorflow/tools/ci_build/release/macos/cpu_py35_full/pip.sh
+++ b/tensorflow/tools/ci_build/release/macos/cpu_py35_full/pip.sh
@@ -23,6 +23,10 @@ which bazel
 bazel version
 set_bazel_outdir
 
+# Pick a more recent version of xcode
+export DEVELOPER_DIR=/Applications/Xcode_10.3.app/Contents/Developer
+sudo xcode-select -s "${DEVELOPER_DIR}"
+
 # Install macos pip dependencies
 install_macos_pip_deps sudo pip3.5
 
diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py36_full/nonpip.sh b/tensorflow/tools/ci_build/release/macos/cpu_py36_full/nonpip.sh
index dcc8147fbf882c..93205f8a60d458 100644
--- a/tensorflow/tools/ci_build/release/macos/cpu_py36_full/nonpip.sh
+++ b/tensorflow/tools/ci_build/release/macos/cpu_py36_full/nonpip.sh
@@ -24,7 +24,8 @@ bazel version
 set_bazel_outdir
 
 # Pick a more recent version of xcode
-sudo xcode-select --switch /Applications/Xcode_9.2.app/Contents/Developer
+export DEVELOPER_DIR=/Applications/Xcode_10.3.app/Contents/Developer
+sudo xcode-select -s "${DEVELOPER_DIR}"
 python3.6 -m virtualenv tf_build_env --system-site-packages
 source tf_build_env/bin/activate
 
diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py36_full/pip.sh b/tensorflow/tools/ci_build/release/macos/cpu_py36_full/pip.sh
index ed6eff65bf39db..d97fbf80f9417d 100644
--- a/tensorflow/tools/ci_build/release/macos/cpu_py36_full/pip.sh
+++ b/tensorflow/tools/ci_build/release/macos/cpu_py36_full/pip.sh
@@ -23,6 +23,10 @@ which bazel
 bazel version
 set_bazel_outdir
 
+# Pick a more recent version of xcode
+export DEVELOPER_DIR=/Applications/Xcode_10.3.app/Contents/Developer
+sudo xcode-select -s "${DEVELOPER_DIR}"
+
 # Install macos pip dependencies
 install_macos_pip_deps sudo pip3.6
 
diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py37_full/nonpip.sh b/tensorflow/tools/ci_build/release/macos/cpu_py37_full/nonpip.sh
index b0eff08a45da92..de34e7be8e33e1 100644
--- a/tensorflow/tools/ci_build/release/macos/cpu_py37_full/nonpip.sh
+++ b/tensorflow/tools/ci_build/release/macos/cpu_py37_full/nonpip.sh
@@ -24,7 +24,8 @@ bazel version
 set_bazel_outdir
 
 # Pick a more recent version of xcode
-sudo xcode-select --switch /Applications/Xcode_9.2.app/Contents/Developer
+export DEVELOPER_DIR=/Applications/Xcode_10.3.app/Contents/Developer
+sudo xcode-select -s "${DEVELOPER_DIR}"
 python -m virtualenv tf_build_env --system-site-packages
 source tf_build_env/bin/activate
 
diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py37_full/pip.sh b/tensorflow/tools/ci_build/release/macos/cpu_py37_full/pip.sh
index 1c1df96171f4b8..84f8b05818f4bd 100644
--- a/tensorflow/tools/ci_build/release/macos/cpu_py37_full/pip.sh
+++ b/tensorflow/tools/ci_build/release/macos/cpu_py37_full/pip.sh
@@ -23,6 +23,10 @@ which bazel
 bazel version
 set_bazel_outdir
 
+# Pick a more recent version of xcode
+export DEVELOPER_DIR=/Applications/Xcode_10.3.app/Contents/Developer
+sudo xcode-select -s "${DEVELOPER_DIR}"
+
 # Install macos pip dependencies
 install_macos_pip_deps sudo pip3.7
 

From eb7b125276ee8569cbb80b7fb3a7e75c49729677 Mon Sep 17 00:00:00 2001
From: sunway513 <peng.sun@amd.com>
Date: Tue, 12 Nov 2019 02:00:24 -0800
Subject: [PATCH 032/130] Fix for runtime error of ROCm build

See https://github.com/tensorflow/tensorflow/issues/34082#issuecomment-552493262

Close https://github.com/tensorflow/tensorflow/pull/34177
Fixes https://github.com/tensorflow/tensorflow/issues/34082

The preconfigured toolchain is updated by:
tensorflow/third_party/toolchains/preconfig/generate/update.sh ubuntu16.04-py3_opt-gcc5-rocm

PiperOrigin-RevId: 279915064
Change-Id: I41396f8b47624ccc3ebdbb0889512a008c9b827a

[ROCm] Fix for the broken ROCm CSB.

The following commit breaks the --config=rocm build

https://github.com/tensorflow/tensorflow/commit/bf9c196f37b9cbb3109b2891aaf9da85bf5f712a

The above commit adds support for complex type in the optimizers. Complex types are not supported on the ROCm platform. Support for it needs to be excluded on the ROCm platform, and that is what this "fix" does.

[ROCm] Fix for the broken ROCm CSB.

The following commit breaks the --config=rocm build

https://github.com/tensorflow/tensorflow/commit/f72695e1717a545bfc898b7230cc195bf28b43df

The above commit adds a couple of subtests that require support for the `StatefulUnirformFullInt` Op on the GPU. Currently ROCm does not support that Op on the GPU, which leads to those subtests failing.

The "fix" is to skip those subtests on the ROCm platform.

[ROCm] Fix for the broken ROCm CSB.

The following commit breaks the `--config=rocm` build

https://github.com/tensorflow/tensorflow/commit/ab6524320e616774ce00e195b9cf0efbb991834e

The commit above introduces the test "test_opt_einsum_cached" in //tensorflow/python:special_math_ops_test_gpu

The order of execution of other tests within that file can dictate whether or not the newly added test will pass or fail.
The failure (caught byt he ROCm Nighty CSB run) does not seem specific to the ROCm platform.

The "fix" is to explicitly clear the lru_cache of the routine "special_math_ops._get_opt_einsum_contract_path" (before running the test) to gurantee that the test will pass, irrespective of the order in which it is run relative to the other tests.

[ROCm] Fix for the broken ROCm CSB.

The following commit breaks the `--config=rocm` build

https://github.com/tensorflow/tensorflow/commit/c8b0100b9a297eac7725205e9f803cdbbb36809a

The commit above introduces the test "testFusedBatchNormGradsInference" in //tensorflow/python/eager:forwardprop_test_gpu

We are still working towards analysing the cause of the failure and potentially coming up with the fix. In meantime, the change in this commit is to skip the failing subtest on the ROCm platform. This is so that we can get he ROCm Nightly CSB build passing again.

[ROCm] Update ROCm CI builds to use ROCm 2.8

This PR/commit updates the Dockerfile.rocm file to use ROCm version 2.8 (from the current 2.6).

Switching to ROCm version 2.8, also adds to the requirement of specifying a couple of extra option to the `docker run` command.
That change is also a part of this PR/commit.
---
 tensorflow/core/kernels/training_ops.cc       | 72 ++++++++++++++-----
 .../core/kernels/training_ops_gpu.cu.cc       | 40 ++++++++---
 tensorflow/python/eager/forwardprop_test.py   |  7 ++
 .../keras/layers/image_preprocessing_test.py  | 10 +++
 .../keras/optimizer_v2/adadelta_test.py       |  3 +-
 .../python/keras/optimizer_v2/adagrad_test.py |  3 +-
 .../python/keras/optimizer_v2/rmsprop_test.py |  3 +-
 .../python/ops/special_math_ops_test.py       | 10 +++
 tensorflow/tools/ci_build/Dockerfile.rocm     |  2 +-
 .../tools/ci_build/builds/docker_test.sh      |  3 +-
 tensorflow/tools/ci_build/ci_build.sh         |  3 +-
 third_party/gpus/crosstool/BUILD.rocm.tpl     |  2 +-
 .../preconfig/ubuntu16.04/gcc5-rocm/BUILD     |  2 +-
 13 files changed, 124 insertions(+), 36 deletions(-)

diff --git a/tensorflow/core/kernels/training_ops.cc b/tensorflow/core/kernels/training_ops.cc
index 3490dc1ee80475..467087b786423d 100644
--- a/tensorflow/core/kernels/training_ops.cc
+++ b/tensorflow/core/kernels/training_ops.cc
@@ -670,7 +670,9 @@ namespace functor {
 DECLARE_GPU_SPEC(Eigen::half);
 DECLARE_GPU_SPEC(float);
 DECLARE_GPU_SPEC(double);
-#ifndef TENSORFLOW_USE_NVCC  // TODO(b/143684500): Eigen to support complex sqrt
+#if !defined(TENSORFLOW_USE_NVCC) && \
+    !defined(TENSORFLOW_USE_ROCM)  // TODO(b/143684500): Eigen to support
+                                   // complex sqrt
 #ifndef PLATFORM_WINDOWS
 DECLARE_GPU_SPEC(complex64);
 DECLARE_GPU_SPEC(complex128);
@@ -682,7 +684,9 @@ DECLARE_GPU_SPEC(complex128);
 REGISTER_KERNELS(GPU, Eigen::half);
 REGISTER_KERNELS(GPU, float);
 REGISTER_KERNELS(GPU, double);
-#ifndef TENSORFLOW_USE_NVCC  // TODO(b/143684500): Eigen to support complex sqrt
+#if !defined(TENSORFLOW_USE_NVCC) && \
+    !defined(TENSORFLOW_USE_ROCM)  // TODO(b/143684500): Eigen to support
+                                   // complex sqrt
 #ifndef PLATFORM_WINDOWS
 REGISTER_KERNELS(GPU, complex64);
 REGISTER_KERNELS(GPU, complex128);
@@ -849,7 +853,9 @@ namespace functor {
 DECLARE_GPU_SPEC(Eigen::half);
 DECLARE_GPU_SPEC(float);
 DECLARE_GPU_SPEC(double);
-#ifndef TENSORFLOW_USE_NVCC  // TODO(b/143684500): Eigen to support complex sqrt
+#if !defined(TENSORFLOW_USE_NVCC) && \
+    !defined(TENSORFLOW_USE_ROCM)  // TODO(b/143684500): Eigen to support
+                                   // complex sqrt
 #ifndef PLATFORM_WINDOWS
 DECLARE_GPU_SPEC(complex64);
 DECLARE_GPU_SPEC(complex128);
@@ -861,7 +867,9 @@ DECLARE_GPU_SPEC(complex128);
 REGISTER_KERNELS(GPU, Eigen::half);
 REGISTER_KERNELS(GPU, float);
 REGISTER_KERNELS(GPU, double);
-#ifndef TENSORFLOW_USE_NVCC  // TODO(b/143684500): Eigen to support complex sqrt
+#if !defined(TENSORFLOW_USE_NVCC) && \
+    !defined(TENSORFLOW_USE_ROCM)  // TODO(b/143684500): Eigen to support
+                                   // complex sqrt
 #ifndef PLATFORM_WINDOWS
 REGISTER_KERNELS(GPU, complex64);
 REGISTER_KERNELS(GPU, complex128);
@@ -1340,7 +1348,9 @@ namespace functor {
 DECLARE_GPU_SPEC(Eigen::half);
 DECLARE_GPU_SPEC(float);
 DECLARE_GPU_SPEC(double);
-#ifndef TENSORFLOW_USE_NVCC  // TODO(b/143684500): Eigen to support complex sqrt
+#if !defined(TENSORFLOW_USE_NVCC) && \
+    !defined(TENSORFLOW_USE_ROCM)  // TODO(b/143684500): Eigen to support
+                                   // complex sqrt
 #ifndef PLATFORM_WINDOWS
 DECLARE_GPU_SPEC(complex64);
 DECLARE_GPU_SPEC(complex128);
@@ -1352,7 +1362,9 @@ DECLARE_GPU_SPEC(complex128);
 REGISTER_KERNELS(GPU, Eigen::half);
 REGISTER_KERNELS(GPU, float);
 REGISTER_KERNELS(GPU, double);
-#ifndef TENSORFLOW_USE_NVCC  // TODO(b/143684500): Eigen to support complex sqrt
+#if !defined(TENSORFLOW_USE_NVCC) && \
+    !defined(TENSORFLOW_USE_ROCM)  // TODO(b/143684500): Eigen to support
+                                   // complex sqrt
 #ifndef PLATFORM_WINDOWS
 REGISTER_KERNELS(GPU, complex64);
 REGISTER_KERNELS(GPU, complex128);
@@ -1456,7 +1468,9 @@ namespace functor {
 DECLARE_GPU_SPEC(Eigen::half);
 DECLARE_GPU_SPEC(float);
 DECLARE_GPU_SPEC(double);
-#ifndef TENSORFLOW_USE_NVCC  // TODO(b/143684500): Eigen to support complex sqrt
+#if !defined(TENSORFLOW_USE_NVCC) && \
+    !defined(TENSORFLOW_USE_ROCM)  // TODO(b/143684500): Eigen to support
+                                   // complex sqrt
 #ifndef PLATFORM_WINDOWS
 DECLARE_GPU_SPEC(complex64);
 DECLARE_GPU_SPEC(complex128);
@@ -1468,7 +1482,9 @@ DECLARE_GPU_SPEC(complex128);
 REGISTER_KERNELS(GPU, Eigen::half);
 REGISTER_KERNELS(GPU, float);
 REGISTER_KERNELS(GPU, double);
-#ifndef TENSORFLOW_USE_NVCC  // TODO(b/143684500): Eigen to support complex sqrt
+#if !defined(TENSORFLOW_USE_NVCC) && \
+    !defined(TENSORFLOW_USE_ROCM)  // TODO(b/143684500): Eigen to support
+                                   // complex sqrt
 #ifndef PLATFORM_WINDOWS
 REGISTER_KERNELS(GPU, complex64);
 REGISTER_KERNELS(GPU, complex128);
@@ -2957,7 +2973,9 @@ namespace functor {
 DECLARE_GPU_SPEC(Eigen::half);
 DECLARE_GPU_SPEC(float);
 DECLARE_GPU_SPEC(double);
-#ifndef TENSORFLOW_USE_NVCC  // TODO(b/143684500): Eigen to support complex sqrt
+#if !defined(TENSORFLOW_USE_NVCC) && \
+    !defined(TENSORFLOW_USE_ROCM)  // TODO(b/143684500): Eigen to support
+                                   // complex sqrt
 #ifndef PLATFORM_WINDOWS
 DECLARE_GPU_SPEC(complex64);
 DECLARE_GPU_SPEC(complex128);
@@ -2969,7 +2987,9 @@ DECLARE_GPU_SPEC(complex128);
 REGISTER_KERNELS(GPU, Eigen::half);
 REGISTER_KERNELS(GPU, float);
 REGISTER_KERNELS(GPU, double);
-#ifndef TENSORFLOW_USE_NVCC  // TODO(b/143684500): Eigen to support complex sqrt
+#if !defined(TENSORFLOW_USE_NVCC) && \
+    !defined(TENSORFLOW_USE_ROCM)  // TODO(b/143684500): Eigen to support
+                                   // complex sqrt
 #ifndef PLATFORM_WINDOWS
 REGISTER_KERNELS(GPU, complex64);
 REGISTER_KERNELS(GPU, complex128);
@@ -3195,7 +3215,9 @@ namespace functor {
 DECLARE_GPU_SPEC(Eigen::half);
 DECLARE_GPU_SPEC(float);
 DECLARE_GPU_SPEC(double);
-#ifndef TENSORFLOW_USE_NVCC  // TODO(b/143684500): Eigen to support complex sqrt
+#if !defined(TENSORFLOW_USE_NVCC) && \
+    !defined(TENSORFLOW_USE_ROCM)  // TODO(b/143684500): Eigen to support
+                                   // complex sqrt
 #ifndef PLATFORM_WINDOWS
 DECLARE_GPU_SPEC(complex64);
 DECLARE_GPU_SPEC(complex128);
@@ -3207,7 +3229,9 @@ DECLARE_GPU_SPEC(complex128);
 REGISTER_KERNELS(GPU, Eigen::half);
 REGISTER_KERNELS(GPU, float);
 REGISTER_KERNELS(GPU, double);
-#ifndef TENSORFLOW_USE_NVCC  // TODO(b/143684500): Eigen to support complex sqrt
+#if !defined(TENSORFLOW_USE_NVCC) && \
+    !defined(TENSORFLOW_USE_ROCM)  // TODO(b/143684500): Eigen to support
+                                   // complex sqrt
 #ifndef PLATFORM_WINDOWS
 REGISTER_KERNELS(GPU, complex64);
 REGISTER_KERNELS(GPU, complex128);
@@ -3337,7 +3361,9 @@ DECLARE_GPU_SPEC(float, int32);
 DECLARE_GPU_SPEC(float, int64);
 DECLARE_GPU_SPEC(double, int32);
 DECLARE_GPU_SPEC(double, int64);
-#ifndef TENSORFLOW_USE_NVCC  // TODO(b/143684500): Eigen to support complex sqrt
+#if !defined(TENSORFLOW_USE_NVCC) && \
+    !defined(TENSORFLOW_USE_ROCM)  // TODO(b/143684500): Eigen to support
+                                   // complex sqrt
 #ifndef PLATFORM_WINDOWS
 DECLARE_GPU_SPEC(complex64, int32);
 DECLARE_GPU_SPEC(complex64, int64);
@@ -3355,7 +3381,9 @@ DECLARE_GPU_SPEC(complex128, int64);
 REGISTER_GPU_KERNELS(Eigen::half);
 REGISTER_GPU_KERNELS(float);
 REGISTER_GPU_KERNELS(double);
-#ifndef TENSORFLOW_USE_NVCC  // TODO(b/143684500): Eigen to support complex sqrt
+#if !defined(TENSORFLOW_USE_NVCC) && \
+    !defined(TENSORFLOW_USE_ROCM)  // TODO(b/143684500): Eigen to support
+                                   // complex sqrt
 #ifndef PLATFORM_WINDOWS
 REGISTER_GPU_KERNELS(complex64);
 REGISTER_GPU_KERNELS(complex128);
@@ -3622,7 +3650,9 @@ namespace functor {
 DECLARE_GPU_SPEC(Eigen::half);
 DECLARE_GPU_SPEC(float);
 DECLARE_GPU_SPEC(double);
-#ifndef TENSORFLOW_USE_NVCC  // TODO(b/143684500): Eigen to support complex sqrt
+#if !defined(TENSORFLOW_USE_NVCC) && \
+    !defined(TENSORFLOW_USE_ROCM)  // TODO(b/143684500): Eigen to support
+                                   // complex sqrt
 #ifndef PLATFORM_WINDOWS
 DECLARE_GPU_SPEC(complex64);
 DECLARE_GPU_SPEC(complex128);
@@ -3634,7 +3664,9 @@ DECLARE_GPU_SPEC(complex128);
 REGISTER_KERNELS(GPU, Eigen::half);
 REGISTER_KERNELS(GPU, float);
 REGISTER_KERNELS(GPU, double);
-#ifndef TENSORFLOW_USE_NVCC  // TODO(b/143684500): Eigen to support complex sqrt
+#if !defined(TENSORFLOW_USE_NVCC) && \
+    !defined(TENSORFLOW_USE_ROCM)  // TODO(b/143684500): Eigen to support
+                                   // complex sqrt
 #ifndef PLATFORM_WINDOWS
 REGISTER_KERNELS(GPU, complex64);
 REGISTER_KERNELS(GPU, complex128);
@@ -4151,7 +4183,9 @@ namespace functor {
 DECLARE_GPU_SPEC(Eigen::half);
 DECLARE_GPU_SPEC(float);
 DECLARE_GPU_SPEC(double);
-#ifndef TENSORFLOW_USE_NVCC  // TODO(b/143684500): Eigen to support complex sqrt
+#if !defined(TENSORFLOW_USE_NVCC) && \
+    !defined(TENSORFLOW_USE_ROCM)  // TODO(b/143684500): Eigen to support
+                                   // complex sqrt
 #ifndef PLATFORM_WINDOWS
 DECLARE_GPU_SPEC(complex64);
 DECLARE_GPU_SPEC(complex128);
@@ -4163,7 +4197,9 @@ DECLARE_GPU_SPEC(complex128);
 REGISTER_KERNELS(GPU, Eigen::half);
 REGISTER_KERNELS(GPU, float);
 REGISTER_KERNELS(GPU, double);
-#ifndef TENSORFLOW_USE_NVCC  // TODO(b/143684500): Eigen to support complex sqrt
+#if !defined(TENSORFLOW_USE_NVCC) && \
+    !defined(TENSORFLOW_USE_ROCM)  // TODO(b/143684500): Eigen to support
+                                   // complex sqrt
 #ifndef PLATFORM_WINDOWS
 REGISTER_KERNELS(GPU, complex64);
 REGISTER_KERNELS(GPU, complex128);
diff --git a/tensorflow/core/kernels/training_ops_gpu.cu.cc b/tensorflow/core/kernels/training_ops_gpu.cu.cc
index 0995b31e734751..8b7f5dc2e40ef3 100644
--- a/tensorflow/core/kernels/training_ops_gpu.cu.cc
+++ b/tensorflow/core/kernels/training_ops_gpu.cu.cc
@@ -524,7 +524,9 @@ struct ApplyPowerSign<GPUDevice, T> {
 template struct functor::ApplyGradientDescent<GPUDevice, Eigen::half>;
 template struct functor::ApplyGradientDescent<GPUDevice, float>;
 template struct functor::ApplyGradientDescent<GPUDevice, double>;
-#ifndef TENSORFLOW_USE_NVCC  // TODO(b/143684500): Eigen to support complex sqrt
+#if !defined(TENSORFLOW_USE_NVCC) && \
+    !defined(TENSORFLOW_USE_ROCM)  // TODO(b/143684500): Eigen to support
+                                   // complex sqrt
 #ifndef PLATFORM_WINDOWS
 template struct functor::ApplyGradientDescent<GPUDevice, complex64>;
 template struct functor::ApplyGradientDescent<GPUDevice, complex128>;
@@ -534,7 +536,9 @@ template struct functor::ApplyGradientDescent<GPUDevice, complex128>;
 template struct functor::ApplyAdagrad<GPUDevice, Eigen::half>;
 template struct functor::ApplyAdagrad<GPUDevice, float>;
 template struct functor::ApplyAdagrad<GPUDevice, double>;
-#ifndef TENSORFLOW_USE_NVCC  // TODO(b/143684500): Eigen to support complex sqrt
+#if !defined(TENSORFLOW_USE_NVCC) && \
+    !defined(TENSORFLOW_USE_ROCM)  // TODO(b/143684500): Eigen to support
+                                   // complex sqrt
 #ifndef PLATFORM_WINDOWS
 template struct functor::ApplyAdagrad<GPUDevice, complex64>;
 template struct functor::ApplyAdagrad<GPUDevice, complex128>;
@@ -544,7 +548,9 @@ template struct functor::ApplyAdagrad<GPUDevice, complex128>;
 template struct functor::ApplyAdagradV2<GPUDevice, Eigen::half>;
 template struct functor::ApplyAdagradV2<GPUDevice, float>;
 template struct functor::ApplyAdagradV2<GPUDevice, double>;
-#ifndef TENSORFLOW_USE_NVCC  // TODO(b/143684500): Eigen to support complex sqrt
+#if !defined(TENSORFLOW_USE_NVCC) && \
+    !defined(TENSORFLOW_USE_ROCM)  // TODO(b/143684500): Eigen to support
+                                   // complex sqrt
 #ifndef PLATFORM_WINDOWS
 template struct functor::ApplyAdagradV2<GPUDevice, complex64>;
 template struct functor::ApplyAdagradV2<GPUDevice, complex128>;
@@ -554,7 +560,9 @@ template struct functor::ApplyAdagradV2<GPUDevice, complex128>;
 template struct functor::ApplyAdadelta<GPUDevice, Eigen::half>;
 template struct functor::ApplyAdadelta<GPUDevice, float>;
 template struct functor::ApplyAdadelta<GPUDevice, double>;
-#ifndef TENSORFLOW_USE_NVCC  // TODO(b/143684500): Eigen to support complex sqrt
+#if !defined(TENSORFLOW_USE_NVCC) && \
+    !defined(TENSORFLOW_USE_ROCM)  // TODO(b/143684500): Eigen to support
+                                   // complex sqrt
 #ifndef PLATFORM_WINDOWS
 template struct functor::ApplyAdadelta<GPUDevice, complex64>;
 template struct functor::ApplyAdadelta<GPUDevice, complex128>;
@@ -572,7 +580,9 @@ template struct functor::ApplyFtrlV2<GPUDevice, double>;
 template struct functor::ApplyMomentum<GPUDevice, Eigen::half>;
 template struct functor::ApplyMomentum<GPUDevice, float>;
 template struct functor::ApplyMomentum<GPUDevice, double>;
-#ifndef TENSORFLOW_USE_NVCC  // TODO(b/143684500): Eigen to support complex sqrt
+#if !defined(TENSORFLOW_USE_NVCC) && \
+    !defined(TENSORFLOW_USE_ROCM)  // TODO(b/143684500): Eigen to support
+                                   // complex sqrt
 #ifndef PLATFORM_WINDOWS
 template struct functor::ApplyMomentum<GPUDevice, complex64>;
 template struct functor::ApplyMomentum<GPUDevice, complex128>;
@@ -582,7 +592,9 @@ template struct functor::ApplyMomentum<GPUDevice, complex128>;
 template struct functor::ApplyKerasMomentum<GPUDevice, Eigen::half>;
 template struct functor::ApplyKerasMomentum<GPUDevice, float>;
 template struct functor::ApplyKerasMomentum<GPUDevice, double>;
-#ifndef TENSORFLOW_USE_NVCC  // TODO(b/143684500): Eigen to support complex sqrt
+#if !defined(TENSORFLOW_USE_NVCC) && \
+    !defined(TENSORFLOW_USE_ROCM)  // TODO(b/143684500): Eigen to support
+                                   // complex sqrt
 #ifndef PLATFORM_WINDOWS
 template struct functor::ApplyKerasMomentum<GPUDevice, complex64>;
 template struct functor::ApplyKerasMomentum<GPUDevice, complex128>;
@@ -597,7 +609,9 @@ template struct functor::SparseApplyKerasMomentum<GPUDevice, float, int32>;
 template struct functor::SparseApplyKerasMomentum<GPUDevice, float, int64>;
 template struct functor::SparseApplyKerasMomentum<GPUDevice, double, int32>;
 template struct functor::SparseApplyKerasMomentum<GPUDevice, double, int64>;
-#ifndef TENSORFLOW_USE_NVCC  // TODO(b/143684500): Eigen to support complex sqrt
+#if !defined(TENSORFLOW_USE_NVCC) && \
+    !defined(TENSORFLOW_USE_ROCM)  // TODO(b/143684500): Eigen to support
+                                   // complex sqrt
 #ifndef PLATFORM_WINDOWS
 template struct functor::SparseApplyKerasMomentum<GPUDevice, complex64, int32>;
 template struct functor::SparseApplyKerasMomentum<GPUDevice, complex64, int64>;
@@ -609,7 +623,9 @@ template struct functor::SparseApplyKerasMomentum<GPUDevice, complex128, int64>;
 template struct functor::ApplyAdam<GPUDevice, Eigen::half>;
 template struct functor::ApplyAdam<GPUDevice, float>;
 template struct functor::ApplyAdam<GPUDevice, double>;
-#ifndef TENSORFLOW_USE_NVCC  // TODO(b/143684500): Eigen to support complex sqrt
+#if !defined(TENSORFLOW_USE_NVCC) && \
+    !defined(TENSORFLOW_USE_ROCM)  // TODO(b/143684500): Eigen to support
+                                   // complex sqrt
 #ifndef PLATFORM_WINDOWS
 template struct functor::ApplyAdam<GPUDevice, complex64>;
 template struct functor::ApplyAdam<GPUDevice, complex128>;
@@ -627,7 +643,9 @@ template struct functor::ApplyAdaMax<GPUDevice, double>;
 template struct functor::ApplyRMSProp<GPUDevice, Eigen::half>;
 template struct functor::ApplyRMSProp<GPUDevice, float>;
 template struct functor::ApplyRMSProp<GPUDevice, double>;
-#ifndef TENSORFLOW_USE_NVCC  // TODO(b/143684500): Eigen to support complex sqrt
+#if !defined(TENSORFLOW_USE_NVCC) && \
+    !defined(TENSORFLOW_USE_ROCM)  // TODO(b/143684500): Eigen to support
+                                   // complex sqrt
 #ifndef PLATFORM_WINDOWS
 template struct functor::ApplyRMSProp<GPUDevice, complex64>;
 template struct functor::ApplyRMSProp<GPUDevice, complex128>;
@@ -637,7 +655,9 @@ template struct functor::ApplyRMSProp<GPUDevice, complex128>;
 template struct functor::ApplyCenteredRMSProp<GPUDevice, Eigen::half>;
 template struct functor::ApplyCenteredRMSProp<GPUDevice, float>;
 template struct functor::ApplyCenteredRMSProp<GPUDevice, double>;
-#ifndef TENSORFLOW_USE_NVCC  // TODO(b/143684500): Eigen to support complex sqrt
+#if !defined(TENSORFLOW_USE_NVCC) && \
+    !defined(TENSORFLOW_USE_ROCM)  // TODO(b/143684500): Eigen to support
+                                   // complex sqrt
 #ifndef PLATFORM_WINDOWS
 template struct functor::ApplyCenteredRMSProp<GPUDevice, complex64>;
 template struct functor::ApplyCenteredRMSProp<GPUDevice, complex128>;
diff --git a/tensorflow/python/eager/forwardprop_test.py b/tensorflow/python/eager/forwardprop_test.py
index b214c908c37100..10cd14b6a61dc9 100644
--- a/tensorflow/python/eager/forwardprop_test.py
+++ b/tensorflow/python/eager/forwardprop_test.py
@@ -469,6 +469,13 @@ def _bn_fused(x_arg, scale_arg, offset_arg):
                       atol=1e-3)
 
   def testFusedBatchNormGradsInference(self):
+
+    if test.is_built_with_rocm():
+      # This test was addeded recently and has been failing on the ROCm
+      # platform, since it was added.
+      # TODO(rocm): do root cause analysis of test failure and fix it.
+      self.skipTest("Test fails on ROCm platform, needs further analysis")
+
     x_shape = [4, 10, 10, 2]
     increment = 3. / math_ops.reduce_prod(
         constant_op.constant(x_shape, dtype=dtypes.float32))
diff --git a/tensorflow/python/keras/layers/image_preprocessing_test.py b/tensorflow/python/keras/layers/image_preprocessing_test.py
index d33acbf0de7a6b..c25435a3d28ebb 100644
--- a/tensorflow/python/keras/layers/image_preprocessing_test.py
+++ b/tensorflow/python/keras/layers/image_preprocessing_test.py
@@ -187,6 +187,11 @@ def test_invalid_random_crop(self, expected_height, expected_width):
         self._run_test(expected_height, expected_width)
 
   def test_training_with_mock(self):
+    if test.is_built_with_rocm():
+      # TODO(rocm):
+      # re-enable this test once ROCm adds support for
+      # the StatefulUniformFullInt Op (on the GPU)
+      self.skipTest("Feature not supported on ROCm")
     np.random.seed(1337)
     height, width = 3, 4
     height_offset = np.random.randint(low=0, high=3)
@@ -207,6 +212,11 @@ def test_training_with_mock(self):
       ('random_crop_4_by_6', 4, 6),
       ('random_crop_3_by_2', 3, 2))
   def test_random_crop_output_shape(self, expected_height, expected_width):
+    if test.is_built_with_rocm():
+      # TODO(rocm):
+      # re-enable this test once ROCm adds support for
+      # the StatefulUniformFullInt Op (on the GPU)
+      self.skipTest("Feature not supported on ROCm")
     with CustomObjectScope({'RandomCrop': image_preprocessing.RandomCrop}):
       self._run_test(expected_height, expected_width)
 
diff --git a/tensorflow/python/keras/optimizer_v2/adadelta_test.py b/tensorflow/python/keras/optimizer_v2/adadelta_test.py
index 4dad9198b8531c..5ff9a563f497e1 100644
--- a/tensorflow/python/keras/optimizer_v2/adadelta_test.py
+++ b/tensorflow/python/keras/optimizer_v2/adadelta_test.py
@@ -35,7 +35,8 @@
 
 _DATA_TYPES = [dtypes.half, dtypes.float32, dtypes.float64]
 # TODO(b/143684500): Eigen to support complex sqrt
-if not test_util.IsBuiltWithNvcc() and platform.system() != "Windows":
+if not test_util.IsBuiltWithNvcc() and platform.system() != "Windows" \
+   and not test.is_built_with_rocm():
   _DATA_TYPES += [dtypes.complex64, dtypes.complex128]
 
 
diff --git a/tensorflow/python/keras/optimizer_v2/adagrad_test.py b/tensorflow/python/keras/optimizer_v2/adagrad_test.py
index b0b661da8f78ee..c8e49a003d8cb1 100644
--- a/tensorflow/python/keras/optimizer_v2/adagrad_test.py
+++ b/tensorflow/python/keras/optimizer_v2/adagrad_test.py
@@ -38,7 +38,8 @@
 
 _DATA_TYPES = [dtypes.half, dtypes.float32, dtypes.float64]
 # TODO(b/143684500): Eigen to support complex sqrt
-if not test_util.IsBuiltWithNvcc() and platform.system() != "Windows":
+if not test_util.IsBuiltWithNvcc() and platform.system() != "Windows" \
+   and not test.is_built_with_rocm():
   _DATA_TYPES += [dtypes.complex64, dtypes.complex128]
 
 
diff --git a/tensorflow/python/keras/optimizer_v2/rmsprop_test.py b/tensorflow/python/keras/optimizer_v2/rmsprop_test.py
index 0482b6f00b7757..1a525004c374c4 100644
--- a/tensorflow/python/keras/optimizer_v2/rmsprop_test.py
+++ b/tensorflow/python/keras/optimizer_v2/rmsprop_test.py
@@ -41,7 +41,8 @@
 
 _DATA_TYPES = [dtypes.half, dtypes.float32, dtypes.float64]
 # TODO(b/143684500): Eigen to support complex sqrt
-if not test_util.IsBuiltWithNvcc() and platform.system() != "Windows":
+if not test_util.IsBuiltWithNvcc() and platform.system() != "Windows" \
+   and not test.is_built_with_rocm():
   _DATA_TYPES += [dtypes.complex64, dtypes.complex128]
 
 _TEST_PARAM_VALUES = [
diff --git a/tensorflow/python/ops/special_math_ops_test.py b/tensorflow/python/ops/special_math_ops_test.py
index 6582f37d65be87..7ae9e22858bb12 100644
--- a/tensorflow/python/ops/special_math_ops_test.py
+++ b/tensorflow/python/ops/special_math_ops_test.py
@@ -436,6 +436,16 @@ def test_opt_einsum_cached(self):
     with test.mock.patch.object(
         opt_einsum, 'contract_path',
         wraps=opt_einsum.contract_path) as mock_contract_path:
+
+      # explicitly clear the lru_cache contents for the method
+      #   special_math_ops.get_opt_einsum_contract_path
+      # We need to do this because other tests in this file invoke that method
+      # with the same input args (as input_1 and input_2 above), and if
+      # those tests run before this test, then the call_count for the method
+      # mock_contract_path will not increment.
+      if six.PY3:
+        special_math_ops._get_opt_einsum_contract_path.cache_clear()
+
       self.assertEqual(mock_contract_path.call_count, 0)
       self._check(*input_1)
       self.assertEqual(mock_contract_path.call_count, 1)
diff --git a/tensorflow/tools/ci_build/Dockerfile.rocm b/tensorflow/tools/ci_build/Dockerfile.rocm
index 191947da7a74d7..a083bc6debd9e6 100644
--- a/tensorflow/tools/ci_build/Dockerfile.rocm
+++ b/tensorflow/tools/ci_build/Dockerfile.rocm
@@ -3,7 +3,7 @@
 FROM ubuntu:xenial
 MAINTAINER Jeff Poznanovic <jeffrey.poznanovic@amd.com>
 
-ARG DEB_ROCM_REPO=http://repo.radeon.com/rocm/apt/2.6/
+ARG DEB_ROCM_REPO=http://repo.radeon.com/rocm/apt/2.8.0/
 ARG ROCM_PATH=/opt/rocm
 
 ENV DEBIAN_FRONTEND noninteractive
diff --git a/tensorflow/tools/ci_build/builds/docker_test.sh b/tensorflow/tools/ci_build/builds/docker_test.sh
index 38891b60e57676..39e119f889537e 100755
--- a/tensorflow/tools/ci_build/builds/docker_test.sh
+++ b/tensorflow/tools/ci_build/builds/docker_test.sh
@@ -109,7 +109,8 @@ if [ "${IMAGE_TYPE}" == "gpu" ]; then
   libs=$(\ls /usr/lib/x86_64-linux-gnu/libcuda.* | xargs -I{} echo '-v {}:{}')
   GPU_EXTRA_PARAMS="${devices} ${libs}"
 elif [ "${IMAGE_TYPE}" == "rocm" ]; then
-  ROCM_EXTRA_PARAMS="--device=/dev/kfd --device=/dev/dri --group-add video"
+  ROCM_EXTRA_PARAMS="--device=/dev/kfd --device=/dev/dri --group-add video \
+  --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --shm-size 16G"
 else
   GPU_EXTRA_PARAMS=""
   ROCM_EXTRA_PARAMS=""
diff --git a/tensorflow/tools/ci_build/ci_build.sh b/tensorflow/tools/ci_build/ci_build.sh
index 079765bd5f9d22..d41972f4e1a9ba 100755
--- a/tensorflow/tools/ci_build/ci_build.sh
+++ b/tensorflow/tools/ci_build/ci_build.sh
@@ -111,7 +111,8 @@ fi
 
 # Add extra params for rocm devices and libraries for ROCm container.
 if [[ "${CONTAINER_TYPE}" == "rocm" ]]; then
-  ROCM_EXTRA_PARAMS="--device=/dev/kfd --device=/dev/dri --group-add video"
+  ROCM_EXTRA_PARAMS="--device=/dev/kfd --device=/dev/dri --group-add video \
+  --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --shm-size 16G"
 else
   ROCM_EXTRA_PARAMS=""
 fi
diff --git a/third_party/gpus/crosstool/BUILD.rocm.tpl b/third_party/gpus/crosstool/BUILD.rocm.tpl
index be32aa7f808c34..8ca69e117c8486 100644
--- a/third_party/gpus/crosstool/BUILD.rocm.tpl
+++ b/third_party/gpus/crosstool/BUILD.rocm.tpl
@@ -90,7 +90,7 @@ cc_toolchain_config(
         "-lm",
     ],
     link_libs = [],
-    opt_link_flags = ["-Wl,--gc-sections"],
+    opt_link_flags = [],
     unfiltered_compile_flags = [
         "-fno-canonical-system-headers",
         "-Wno-builtin-macro-redefined",
diff --git a/third_party/toolchains/preconfig/ubuntu16.04/gcc5-rocm/BUILD b/third_party/toolchains/preconfig/ubuntu16.04/gcc5-rocm/BUILD
index 00daa6042acd97..e1b16d20dbe397 100755
--- a/third_party/toolchains/preconfig/ubuntu16.04/gcc5-rocm/BUILD
+++ b/third_party/toolchains/preconfig/ubuntu16.04/gcc5-rocm/BUILD
@@ -122,7 +122,7 @@ cc_toolchain_config(
         "-ffunction-sections",
         "-fdata-sections",
     ],
-    opt_link_flags = ["-Wl,--gc-sections"],
+    opt_link_flags = [],
     supports_start_end_lib = True,
     target_libc = "local",
     target_system_name = "local",

From 3bfa70b58dc072054074c6ca1f50e3f87e973a2f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 18 Nov 2019 14:40:19 -0800
Subject: [PATCH 033/130] Reduced tolerance of ExponentialOpTest.

PiperOrigin-RevId: 281156604
Change-Id: I57fae6b19444a5e4ccf4a731ffe6722269fda4c4
---
 tensorflow/python/kernel_tests/matrix_exponential_op_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/kernel_tests/matrix_exponential_op_test.py b/tensorflow/python/kernel_tests/matrix_exponential_op_test.py
index 520e4d3eb8de1c..ed47e8980d993e 100644
--- a/tensorflow/python/kernel_tests/matrix_exponential_op_test.py
+++ b/tensorflow/python/kernel_tests/matrix_exponential_op_test.py
@@ -63,7 +63,7 @@ def _verifyExponential(self, x, np_type):
         else:
           np_ans = np_expm(inp)
       out = self.evaluate(tf_ans)
-      self.assertAllClose(np_ans, out, rtol=1e-4, atol=1e-3)
+      self.assertAllClose(np_ans, out, rtol=1e-3, atol=1e-3)
 
   def _verifyExponentialReal(self, x):
     for np_type in [np.float32, np.float64]:

From bf54ee5b8db2ba42787f51ba28413dd801b65417 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 18 Nov 2019 16:18:17 -0800
Subject: [PATCH 034/130] Remove the assert on no new tensors.

PiperOrigin-RevId: 281177959
Change-Id: I7e524588b13b348913db9d32589a96d5015b99d7
---
 tensorflow/python/eager/backprop_test.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py
index 8e3efbf1afc02a..23cfbd44972db8 100644
--- a/tensorflow/python/eager/backprop_test.py
+++ b/tensorflow/python/eager/backprop_test.py
@@ -910,7 +910,6 @@ def testUnconnectedGradientsVariablesZeros(self):
     dz_dx = g.gradient(z, x, unconnected_gradients='zero')
     self.assertAllEqual([[0.0, 0.0], [0.0, 0.0]], self.evaluate(dz_dx))
 
-  @test_util.assert_no_new_tensors
   @test_util.run_in_graph_and_eager_modes
   def testUnknownUnconnectedGradientsValueGiven(self):
     x = constant_op.constant(1.0)

From cd6be9fd8ae7309b1b697235dbd7e30dfe71e2fa Mon Sep 17 00:00:00 2001
From: Jared Duke <jdduke@google.com>
Date: Fri, 15 Nov 2019 17:09:35 -0800
Subject: [PATCH 035/130] Switch to NDK API level 21

PiperOrigin-RevId: 280766624
Change-Id: I8500b69a5f6bebbeb0aafcf5744f5be5944738b9
---
 configure.py                                      | 2 +-
 tensorflow/lite/g3doc/guide/android.md            | 2 +-
 tensorflow/tools/ci_build/Dockerfile.android      | 2 +-
 tensorflow/tools/ci_build/builds/builds_common.sh | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/configure.py b/configure.py
index d29f3d4464979c..79a43735adaf80 100644
--- a/configure.py
+++ b/configure.py
@@ -817,7 +817,7 @@ def valid_api_level(api_level):
   android_ndk_api_level = prompt_loop_or_load_from_env(
       environ_cp,
       var_name='ANDROID_NDK_API_LEVEL',
-      var_default='18',  # 18 is required for GPU acceleration.
+      var_default='21',  # 21 is required for ARM64 support.
       ask_for_var=('Please specify the (min) Android NDK API level to use. '
                    '[Available levels: %s]') % api_levels,
       check_success=valid_api_level,
diff --git a/tensorflow/lite/g3doc/guide/android.md b/tensorflow/lite/g3doc/guide/android.md
index 8ebc859f846f9a..8b8324aa8d6e8f 100644
--- a/tensorflow/lite/g3doc/guide/android.md
+++ b/tensorflow/lite/g3doc/guide/android.md
@@ -132,7 +132,7 @@ in the `.tf_configure.bazelrc` file in the root folder:
 
 ```shell
 build --action_env ANDROID_NDK_HOME="/usr/local/android/android-ndk-r17c"
-build --action_env ANDROID_NDK_API_LEVEL="18"
+build --action_env ANDROID_NDK_API_LEVEL="21"
 build --action_env ANDROID_BUILD_TOOLS_VERSION="28.0.3"
 build --action_env ANDROID_SDK_API_LEVEL="23"
 build --action_env ANDROID_SDK_HOME="/usr/local/android/android-sdk-linux"
diff --git a/tensorflow/tools/ci_build/Dockerfile.android b/tensorflow/tools/ci_build/Dockerfile.android
index 81e9077cd0ace5..80949ac64ebcdf 100644
--- a/tensorflow/tools/ci_build/Dockerfile.android
+++ b/tensorflow/tools/ci_build/Dockerfile.android
@@ -29,7 +29,7 @@ RUN mkdir -p ${ANDROID_DEV_HOME}
 ENV ANDROID_SDK_FILENAME tools_r25.2.5-linux.zip
 ENV ANDROID_SDK_URL https://dl.google.com/android/repository/${ANDROID_SDK_FILENAME}
 ENV ANDROID_API_LEVEL 23
-ENV ANDROID_NDK_API_LEVEL 18
+ENV ANDROID_NDK_API_LEVEL 21
 # Build Tools Version liable to change.
 ENV ANDROID_BUILD_TOOLS_VERSION 28.0.0
 ENV ANDROID_SDK_HOME ${ANDROID_DEV_HOME}/sdk
diff --git a/tensorflow/tools/ci_build/builds/builds_common.sh b/tensorflow/tools/ci_build/builds/builds_common.sh
index 55a4ac800f39b3..8b0c065a9e3702 100644
--- a/tensorflow/tools/ci_build/builds/builds_common.sh
+++ b/tensorflow/tools/ci_build/builds/builds_common.sh
@@ -235,7 +235,7 @@ android_sdk_repository(
 android_ndk_repository(
     name="androidndk",
     path="${ANDROID_NDK_HOME}",
-    api_level=18)
+    api_level=21)
 EOF
     fi
   fi

From ab9f302858c1a8f7f618c950d3a4c5cfb6a6b62e Mon Sep 17 00:00:00 2001
From: Austin Anderson <angerson@google.com>
Date: Tue, 19 Nov 2019 17:04:19 -0800
Subject: [PATCH 036/130] Support /d2ReducedOptimizeHugeFunctions on Windows
 when TF_VC_VERSION=16.4

See https://groups.google.com/a/tensorflow.org/g/build/c/SsW98Eo7l3o

PiperOrigin-RevId: 281412015
Change-Id: Icdea77fe0677ad6b0e1c5cf8f053a81a14cb402e
---
 configure.py | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/configure.py b/configure.py
index d29f3d4464979c..a5f741c4108faf 100644
--- a/configure.py
+++ b/configure.py
@@ -1196,6 +1196,34 @@ def set_system_libs_flag(environ_cp):
     write_to_bazelrc('build --define=INCLUDEDIR=%s' % environ_cp['INCLUDEDIR'])
 
 
+def is_reduced_optimize_huge_functions_available(environ_cp):
+  """Check to see if the system supports /d2ReducedOptimizeHugeFunctions.
+
+  The above compiler flag is a new compiler flag introduced to the Visual Studio
+  compiler in version 16.4 (available in Visual Studio 2019, Preview edition
+  only, as of 2019-11-19). TensorFlow needs this flag to massively reduce
+  compile times, but until 16.4 is officially released, we can't depend on it.
+
+  See also https://groups.google.com/a/tensorflow.org/g/build/c/SsW98Eo7l3o
+
+  Because it's very annoying to check this manually (to check the MSVC installed
+  versions, you need to use the registry, and it's not clear if Bazel will be
+  using that install version anyway), we expect enviroments who know they may
+  use this flag to export TF_VC_VERSION=16.4
+
+  TODO(angerson, gunan): Remove this function when TensorFlow's minimum VS
+  version is upgraded to 16.4.
+
+  Arguments:
+    environ_cp: Environment of the current execution
+
+  Returns:
+    boolean, whether or not /d2ReducedOptimizeHugeFunctions is available on this
+    machine.
+  """
+  return float(environ_cp.get('TF_VC_VERSION', '0')) >= 16.4
+
+
 def set_windows_build_flags(environ_cp):
   """Set Windows specific build options."""
   # The non-monolithic build is not supported yet
@@ -1212,6 +1240,11 @@ def set_windows_build_flags(environ_cp):
   # have to distinct them. This avoids building the same targets twice.
   write_to_bazelrc('build --distinct_host_configuration=false')
 
+  if is_reduced_optimize_huge_functions_available(environ_cp):
+    write_to_bazelrc(
+        'build --copt=/d2ReducedOptimizeHugeFunctions --host_copt=/d2ReducedOptimizeHugeFunctions'
+    )
+
   if get_var(
       environ_cp, 'TF_OVERRIDE_EIGEN_STRONG_INLINE', 'Eigen strong inline',
       True, ('Would you like to override eigen strong inline for some C++ '

From bc17766252da92b570dcbd67e2e1dde0057ed230 Mon Sep 17 00:00:00 2001
From: Yifei Feng <yifeif@google.com>
Date: Wed, 20 Nov 2019 16:53:19 -0800
Subject: [PATCH 037/130] Allow building two pip cpu packages on macos.
 Properly build both packages instead renaming.

PiperOrigin-RevId: 281635032
Change-Id: I3577d1f4f213e1d1daf4caa757506be48d63972c
---
 tensorflow/tools/ci_build/builds/pip_new.sh   | 46 +++++++++++++++----
 .../release/macos/cpu_py2_full/pip.sh         |  3 +-
 .../release/macos/cpu_py35_full/pip.sh        |  3 +-
 .../release/macos/cpu_py36_full/pip.sh        |  3 +-
 .../release/macos/cpu_py37_full/pip.sh        |  3 +-
 5 files changed, 44 insertions(+), 14 deletions(-)

diff --git a/tensorflow/tools/ci_build/builds/pip_new.sh b/tensorflow/tools/ci_build/builds/pip_new.sh
index 932d3e8f60c55e..ace5c014865fc7 100755
--- a/tensorflow/tools/ci_build/builds/pip_new.sh
+++ b/tensorflow/tools/ci_build/builds/pip_new.sh
@@ -60,7 +60,13 @@
 #                                  and tensorflow-gpu pip package. Will
 #                                  automatically handle adding/removing of _gpu
 #                                  suffix depending on what project name was
-#                                  passed.
+#                                  passed. Only work for Ubuntu.
+#   TF_BUILD_BOTH_CPU_PACKAGES:    (1 | 0)
+#                                  1 will build both tensorflow (no gpu support)
+#                                  and tensorflow-cpu pip package. Will
+#                                  automatically handle adding/removing of _cpu
+#                                  suffix depending on what project name was
+#                                  passed. Only work for MacOS
 #
 # To-be-deprecated variable(s).
 #   GIT_TAG_OVERRIDE:    Values for `--git_tag_override`. This flag gets passed
@@ -241,11 +247,13 @@ DEFAULT_PIP_TESTS="" # Do not run any tests by default
 DEFAULT_PROJECT_NAME="tensorflow"
 DEFAULT_PIP_TEST_ROOT="pip_test"
 DEFAULT_BUILD_BOTH_GPU_PACKAGES=0
+DEFAULT_BUILD_BOTH_CPU_PACKAGES=0
 # Take in optional global variables
 PIP_TESTS=${TF_PIP_TESTS:-$DEFAULT_PIP_TESTS}
 PROJECT_NAME=${TF_PROJECT_NAME:-$DEFAULT_PROJECT_NAME}
 PIP_TEST_ROOT=${TF_PIP_TEST_ROOT:-$DEFAULT_PIP_TEST_ROOT}
 BUILD_BOTH_GPU_PACKAGES=${TF_BUILD_BOTH_GPU_PACKAGES:-$DEFAULT_BUILD_BOTH_GPU_PACKAGES}
+BUILD_BOTH_CPU_PACKAGES=${TF_BUILD_BOTH_CPU_PACKAGES:-$DEFAULT_BUILD_BOTH_CPU_PACKAGES}
 
 # Local variables
 PIP_WHL_DIR="${KOKORO_ARTIFACTS_DIR}/tensorflow/${PIP_TEST_ROOT}/whl"
@@ -640,20 +648,38 @@ WHL_DIR=$(dirname "${WHL_PATH}")
 echo "Size of the PIP wheel file built: $(ls -l ${WHL_PATH} | awk '{print $5}')"
 
 # Build the other GPU package.
-if [ "$BUILD_BOTH_GPU_PACKAGES" -eq "1" ]; then
-   echo "====================================="\
-   "Building the other GPU pip package."
+if [[ "$BUILD_BOTH_GPU_PACKAGES" -eq "1" ]] || [[ "$BUILD_BOTH_CPU_PACKAGES" -eq "1" ]]; then
+
+  if [[ "$BUILD_BOTH_GPU_PACKAGES" -eq "1" ]] && [[ "$BUILD_BOTH_CPU_PACKAGES" -eq "1" ]]; then
+    die "ERROR: TF_BUILD_BOTH_GPU_PACKAGES and TF_BUILD_BOTH_GPU_PACKAGES cannot both be set. No additional package will be built."
+  fi
+
+  echo "====================================="
+  if [[ "$BUILD_BOTH_GPU_PACKAGES" -eq "1" ]]; then
+    if ! [[ ${OS_TYPE} == "ubuntu" ]]; then
+      die "ERROR: pip_new.sh only support building both GPU wheels on ubuntu."
+    fi
+    "Building the other GPU pip package."
+    PROJECT_SUFFIX="gpu"
+  else
+    if ! [[ ${OS_TYPE} == "macos" ]]; then
+      die "ERROR: pip_new.sh only support building both CPU wheels on macos."
+    fi
+    "Building the other CPU pip package."
+    PROJECT_SUFFIX="cpu"
+  fi
+
   # Check container type
-  if ! [[ ${CONTAINER_TYPE} == "gpu" ]]; then
-    die "Error: CONTAINER_TYPE needs to be `GPU` to build GPU packages. Got "\
+  if ! [[ ${CONTAINER_TYPE} == ${PROJECT_SUFFIX} ]]; then
+    die "Error: CONTAINER_TYPE needs to be \"${PROJECT_SUFFIX}\" to build ${PROJECT_SUFFIX} packages. Got"\
         "\"${CONTAINER_TYPE}\" instead."
   fi
-  if [[ "$PROJECT_NAME" == *_gpu ]]; then
-    NEW_PROJECT_NAME=${PROJECT_NAME%"_gpu"}
+  if [[ "$PROJECT_NAME" == *_${PROJECT_SUFFIX} ]]; then
+    NEW_PROJECT_NAME=${PROJECT_NAME%"_${PROJECT_SUFFIX}"}
   else
-    NEW_PROJECT_NAME="${PROJECT_NAME}_gpu"
+    NEW_PROJECT_NAME="${PROJECT_NAME}_${PROJECT_SUFFIX}"
   fi
-  echo "The given gpu \$PROJECT_NAME is ${PROJECT_NAME}. The additional GPU "\
+  echo "The given ${PROJECT_SUFFIX} \$PROJECT_NAME is ${PROJECT_NAME}. The additional ${PROJECT_SUFFIX}"\
   "pip package will have project name ${NEW_PROJECT_NAME}."
 
   ./bazel-bin/tensorflow/tools/pip_package/build_pip_package ${PIP_WHL_DIR} ${GPU_FLAG} ${NIGHTLY_FLAG} "--project_name" ${NEW_PROJECT_NAME} || die "build_pip_package FAILED"
diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py2_full/pip.sh b/tensorflow/tools/ci_build/release/macos/cpu_py2_full/pip.sh
index 98c241f0751ce4..3744559a988429 100644
--- a/tensorflow/tools/ci_build/release/macos/cpu_py2_full/pip.sh
+++ b/tensorflow/tools/ci_build/release/macos/cpu_py2_full/pip.sh
@@ -30,10 +30,11 @@ sudo xcode-select -s "${DEVELOPER_DIR}"
 # Install macos pip dependencies
 install_macos_pip_deps sudo
 
-# Export required variables for running pip.sh
+# Export required variables for running pip_new.sh
 export OS_TYPE="MACOS"
 export CONTAINER_TYPE="CPU"
 export TF_PYTHON_VERSION='python2'
+export TF_BUILD_BOTH_CPU_PACKAGES=1
 
 # Run configure.
 export TF_NEED_CUDA=0
diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py35_full/pip.sh b/tensorflow/tools/ci_build/release/macos/cpu_py35_full/pip.sh
index a1cbfde33fe78a..4559c1896164eb 100644
--- a/tensorflow/tools/ci_build/release/macos/cpu_py35_full/pip.sh
+++ b/tensorflow/tools/ci_build/release/macos/cpu_py35_full/pip.sh
@@ -30,10 +30,11 @@ sudo xcode-select -s "${DEVELOPER_DIR}"
 # Install macos pip dependencies
 install_macos_pip_deps sudo pip3.5
 
-# Export required variables for running pip.sh
+# Export required variables for running pip_new.sh
 export OS_TYPE="MACOS"
 export CONTAINER_TYPE="CPU"
 export TF_PYTHON_VERSION='python3.5'
+export TF_BUILD_BOTH_CPU_PACKAGES=1
 
 # Run configure.
 export TF_NEED_CUDA=0
diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py36_full/pip.sh b/tensorflow/tools/ci_build/release/macos/cpu_py36_full/pip.sh
index d97fbf80f9417d..0ae2c3b4069667 100644
--- a/tensorflow/tools/ci_build/release/macos/cpu_py36_full/pip.sh
+++ b/tensorflow/tools/ci_build/release/macos/cpu_py36_full/pip.sh
@@ -30,10 +30,11 @@ sudo xcode-select -s "${DEVELOPER_DIR}"
 # Install macos pip dependencies
 install_macos_pip_deps sudo pip3.6
 
-# Export required variables for running pip.sh
+# Export required variables for running pip_new.sh
 export OS_TYPE="MACOS"
 export CONTAINER_TYPE="CPU"
 export TF_PYTHON_VERSION='python3.6'
+export TF_BUILD_BOTH_CPU_PACKAGES=1
 
 # Run configure.
 export TF_NEED_CUDA=0
diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py37_full/pip.sh b/tensorflow/tools/ci_build/release/macos/cpu_py37_full/pip.sh
index 84f8b05818f4bd..2d5fb071913aff 100644
--- a/tensorflow/tools/ci_build/release/macos/cpu_py37_full/pip.sh
+++ b/tensorflow/tools/ci_build/release/macos/cpu_py37_full/pip.sh
@@ -30,10 +30,11 @@ sudo xcode-select -s "${DEVELOPER_DIR}"
 # Install macos pip dependencies
 install_macos_pip_deps sudo pip3.7
 
-# Export required variables for running pip.sh
+# Export required variables for running pip_new.sh
 export OS_TYPE="MACOS"
 export CONTAINER_TYPE="CPU"
 export TF_PYTHON_VERSION='python3.7'
+export TF_BUILD_BOTH_CPU_PACKAGES=1
 
 # Run configure.
 export TF_NEED_CUDA=0

From 2d398250645908f28dffbb4028d83d6f0f34bb2b Mon Sep 17 00:00:00 2001
From: rxsang <rxsang@google.com>
Date: Thu, 21 Nov 2019 10:03:15 -0800
Subject: [PATCH 038/130] Fix nested function inside XLA context.

---
 tensorflow/python/eager/function.py | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 6cc2a5c0573243..9fdc282105b7d9 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -2510,7 +2510,14 @@ def _cache_key(self, args, kwargs, include_tensor_ranks_only=False):
     # already.
     executing_eagerly = ctx.executing_eagerly()
     parent_graph = None
+    xla_context_id = 0
     if not executing_eagerly:
+      # We want to force function retracing for each different
+      # XLAControlFlowContext, so add `xla_context_id` to the cache key.
+      tpu_context = _enclosing_xla_context()
+      if tpu_context is not None:
+        xla_context_id = id(tpu_context)
+
       with ops.init_scope():
         # The graph, or whether we're executing eagerly, should be a part of the
         # cache key so we don't improperly capture tensors such as variables.
@@ -2533,10 +2540,6 @@ def _cache_key(self, args, kwargs, include_tensor_ranks_only=False):
         device_functions = (pydev.merge_device(ctx.device_name),)
       else:
         device_functions = ()
-        
-      # We should not be in XLA context in eager mode. So always set
-      # `xla_context_id` to 0.
-      xla_context_id = 0
     else:
       colocation_stack = tuple(default_graph._colocation_stack.peek_objs())
       if (uses_distribution_strategy
@@ -2547,14 +2550,6 @@ def _cache_key(self, args, kwargs, include_tensor_ranks_only=False):
         device_functions = tuple(default_graph._device_functions_outer_to_inner)
       else:
         device_functions = ()
-        
-      # We want to force function retracing for each different
-      # XLAControlFlowContext, so add `xla_context_id` to the cache key.
-      tpu_context = _enclosing_xla_context()
-      if tpu_context is not None:
-        xla_context_id = id(tpu_context)
-      else:
-        xla_context_id = 0
 
     in_cross_replica_context = False
     try:

From 46d241933fedeedcec8499c8217503ef8a4220b3 Mon Sep 17 00:00:00 2001
From: Hye Soo Yang <hyey@google.com>
Date: Fri, 8 Nov 2019 16:41:55 -0800
Subject: [PATCH 039/130] Change default CUDA version on Windows to 10.1.

PiperOrigin-RevId: 279416693
Change-Id: Ie818bf80bfc190e3ef1c0b14f2e57ea8f899a4d0
---
 tensorflow/tools/ci_build/release/common_win.bat | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tools/ci_build/release/common_win.bat b/tensorflow/tools/ci_build/release/common_win.bat
index ad23e3a1ab3475..95b09008c542e0 100644
--- a/tensorflow/tools/ci_build/release/common_win.bat
+++ b/tensorflow/tools/ci_build/release/common_win.bat
@@ -56,7 +56,7 @@ IF "%PYTHON_DIRECTORY%"=="Python37" (
 
 :: Set cuda related environment variables. If we are not using CUDA, these are not used.
 IF NOT DEFINED TF_CUDA_VERSION (
-  SET TF_CUDA_VERSION=10.0
+  SET TF_CUDA_VERSION=10.1
 )
 SET TF_CUDNN_VERSION=7
 SET TF_CUDA_COMPUTE_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0

From 711c179522916542d98347deb77663b864409544 Mon Sep 17 00:00:00 2001
From: Rachel Lim <rachelim@google.com>
Date: Thu, 21 Nov 2019 10:09:43 -0800
Subject: [PATCH 040/130] [tf.data] Fix OOM when tf.data map_and_batch is used
 with num_parallel_calls = autotune, batch_size = 1.

Closes #33516.

PiperOrigin-RevId: 281775472
Change-Id: Ie10cea0ef1515d5aff8e3dddadc069ddee1a5a76
---
 tensorflow/core/kernels/data/experimental/BUILD        |  1 +
 .../data/experimental/map_and_batch_dataset_op.cc      | 10 +++++++---
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/kernels/data/experimental/BUILD b/tensorflow/core/kernels/data/experimental/BUILD
index 9cd0e926ea06ed..961d6d52cf1312 100644
--- a/tensorflow/core/kernels/data/experimental/BUILD
+++ b/tensorflow/core/kernels/data/experimental/BUILD
@@ -184,6 +184,7 @@ tf_kernel_library(
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:dataset_ops_op_lib",
         "//tensorflow/core:framework",
+        "//tensorflow/core:framework_internal",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:nn_ops_op_lib",
diff --git a/tensorflow/core/kernels/data/experimental/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/experimental/map_and_batch_dataset_op.cc
index f765cffcd90d14..6fbf153e9d1a67 100644
--- a/tensorflow/core/kernels/data/experimental/map_and_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/experimental/map_and_batch_dataset_op.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/function.h"
 #include "tensorflow/core/common_runtime/input_colocation_exemption_registry.h"
 #include "tensorflow/core/common_runtime/metrics.h"
+#include "tensorflow/core/framework/model.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/stats_aggregator.h"
 #include "tensorflow/core/framework/tensor.h"
@@ -170,9 +171,12 @@ class MapAndBatchDatasetOp::Dataset : public DatasetBase {
           num_parallel_calls_(std::make_shared<model::SharedState>(
               params.dataset->num_parallel_calls_, mu_, cond_var_)),
           max_batch_results_(
-              std::min(kMaxBatchResults, (params.dataset->num_parallel_calls_ +
-                                          params.dataset->batch_size_ - 1) /
-                                             params.dataset->batch_size_)) {}
+              params.dataset->num_parallel_calls_ == model::kAutotune
+                  ? kMaxBatchResults
+                  : std::min(kMaxBatchResults,
+                             (params.dataset->num_parallel_calls_ +
+                              params.dataset->batch_size_ - 1) /
+                                 params.dataset->batch_size_)) {}
 
     ~Iterator() override {
       mutex_lock l(*mu_);

From 3be306cfd9ab87d2d49edd3d6d02e9d6e82312e2 Mon Sep 17 00:00:00 2001
From: Yifei Feng <yifeif@google.com>
Date: Thu, 21 Nov 2019 11:04:42 -0800
Subject: [PATCH 041/130] Make docker has the same bazel version as the env
 that invokes it.

PiperOrigin-RevId: 281789221
Change-Id: I6b2ebbe4bf787bb2e591905c8e5368cfac793e0e
---
 tensorflow/tools/ci_build/builds/docker_cpu_pip.sh            | 4 ++++
 .../tools/ci_build/release/ubuntu_16/gpu_pip_on_cpu/build.sh  | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/tensorflow/tools/ci_build/builds/docker_cpu_pip.sh b/tensorflow/tools/ci_build/builds/docker_cpu_pip.sh
index c87ec292471064..9ea828a9a98721 100755
--- a/tensorflow/tools/ci_build/builds/docker_cpu_pip.sh
+++ b/tensorflow/tools/ci_build/builds/docker_cpu_pip.sh
@@ -22,6 +22,10 @@ pip --version
 pip install portpicker
 pip install *.whl
 
+# Make bazel version the same as the env that invokes this script
+source tensorflow/tools/ci_build/release/common.sh
+update_bazel_linux ${BAZEL_VERSION}
+
 # Use default configuration
 yes "" | python configure.py
 
diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_pip_on_cpu/build.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_pip_on_cpu/build.sh
index 4b619aa7c540cd..d6c2df745e1f26 100755
--- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_pip_on_cpu/build.sh
+++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_pip_on_cpu/build.sh
@@ -53,4 +53,4 @@ WHL_PATH=$(ls "${PIP_WHL_DIR}"/*.whl)
 
 cp "${WHL_PATH}" "$(pwd)"/.
 chmod +x tensorflow/tools/ci_build/builds/docker_cpu_pip.sh
-docker run -e "CI_BUILD_USER=$(id -u -n)" -e "CI_BUILD_UID=$(id -u)"  -e "CI_BUILD_GROUP=$(id -g -n)" -e "CI_BUILD_GID=$(id -g)"  -e "CI_BUILD_HOME=/bazel_pip" -v "$(pwd)":/bazel_pip tensorflow/tensorflow:devel-py3 "./bazel_pip/tensorflow/tools/ci_build/builds/with_the_same_user" "./bazel_pip/tensorflow/tools/ci_build/builds/docker_cpu_pip.sh"
+docker run -e "BAZEL_VERSION=${BAZEL_VERSION}" -e "CI_BUILD_USER=$(id -u -n)" -e "CI_BUILD_UID=$(id -u)"  -e "CI_BUILD_GROUP=$(id -g -n)" -e "CI_BUILD_GID=$(id -g)"  -e "CI_BUILD_HOME=/bazel_pip" -v "$(pwd)":/bazel_pip tensorflow/tensorflow:devel-py3 "./bazel_pip/tensorflow/tools/ci_build/builds/with_the_same_user" "./bazel_pip/tensorflow/tools/ci_build/builds/docker_cpu_pip.sh"

From 97fbcf8c44bb96d3eb516460416018f6bf71ee41 Mon Sep 17 00:00:00 2001
From: Yifei Feng <yifeif@google.com>
Date: Thu, 21 Nov 2019 14:52:00 -0800
Subject: [PATCH 042/130] Do not use common script to install bazel the paths
 in docker image are different.

PiperOrigin-RevId: 281837917
Change-Id: Id62738e214ef66ffd17226bc0c90ae9912fd9bf9
---
 tensorflow/tools/ci_build/builds/docker_cpu_pip.sh | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/tensorflow/tools/ci_build/builds/docker_cpu_pip.sh b/tensorflow/tools/ci_build/builds/docker_cpu_pip.sh
index 9ea828a9a98721..3bb8d8b7afa30e 100755
--- a/tensorflow/tools/ci_build/builds/docker_cpu_pip.sh
+++ b/tensorflow/tools/ci_build/builds/docker_cpu_pip.sh
@@ -23,8 +23,16 @@ pip install portpicker
 pip install *.whl
 
 # Make bazel version the same as the env that invokes this script
-source tensorflow/tools/ci_build/release/common.sh
-update_bazel_linux ${BAZEL_VERSION}
+rm -rf ~/bazel
+mkdir ~/bazel
+pushd ~/bazel
+wget https://github.com/bazelbuild/bazel/releases/download/"${BAZEL_VERSION}"/bazel-"${BAZEL_VERSION}"-installer-linux-x86_64.sh
+chmod +x bazel-*.sh
+./bazel-"${BAZEL_VERSION}"-installer-linux-x86_64.sh --user
+rm bazel-"${BAZEL_VERSION}"-installer-linux-x86_64.sh
+PATH="/bazel_pip/bin:$PATH"
+popd
+bazel version
 
 # Use default configuration
 yes "" | python configure.py

From 73501b7ceb072b104f99d9ab3115572955454a4d Mon Sep 17 00:00:00 2001
From: Yifei Feng <yifeif@google.com>
Date: Thu, 21 Nov 2019 14:53:05 -0800
Subject: [PATCH 043/130] add missing "echo".

PiperOrigin-RevId: 281838161
Change-Id: I396accac7be4e74f8afa00abb554d5e504e0b4f2
---
 tensorflow/tools/ci_build/builds/pip_new.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/tools/ci_build/builds/pip_new.sh b/tensorflow/tools/ci_build/builds/pip_new.sh
index ace5c014865fc7..2559dacd91551a 100755
--- a/tensorflow/tools/ci_build/builds/pip_new.sh
+++ b/tensorflow/tools/ci_build/builds/pip_new.sh
@@ -659,13 +659,13 @@ if [[ "$BUILD_BOTH_GPU_PACKAGES" -eq "1" ]] || [[ "$BUILD_BOTH_CPU_PACKAGES" -eq
     if ! [[ ${OS_TYPE} == "ubuntu" ]]; then
       die "ERROR: pip_new.sh only support building both GPU wheels on ubuntu."
     fi
-    "Building the other GPU pip package."
+    echo "Building the other GPU pip package."
     PROJECT_SUFFIX="gpu"
   else
     if ! [[ ${OS_TYPE} == "macos" ]]; then
       die "ERROR: pip_new.sh only support building both CPU wheels on macos."
     fi
-    "Building the other CPU pip package."
+    echo "Building the other CPU pip package."
     PROJECT_SUFFIX="cpu"
   fi
 

From 98e4379f7e80178374227636a4e695e79988ad18 Mon Sep 17 00:00:00 2001
From: Rachel Lim <rachelim@google.com>
Date: Thu, 21 Nov 2019 15:51:57 -0800
Subject: [PATCH 044/130] [tf.data] Fix deadlock with Prefetch+ParallelMap

PiperOrigin-RevId: 281851149
Change-Id: I1b776edb68b45eabc9a0e931135470cae1b6e8f1
---
 .../core/kernels/data/prefetch_dataset_op.cc  | 20 +++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/tensorflow/core/kernels/data/prefetch_dataset_op.cc b/tensorflow/core/kernels/data/prefetch_dataset_op.cc
index 618e2b17c94f4c..097f3cdc688e87 100644
--- a/tensorflow/core/kernels/data/prefetch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/prefetch_dataset_op.cc
@@ -208,14 +208,17 @@ class PrefetchDatasetOp::Dataset : public DatasetBase {
       }
 
       mutex_lock parent_l(*parent_mu_);
-      mutex_lock l(*mu_);
-      if (stats_aggregator) {
-        stats_aggregator->AddScalar(
-            stats_utils::BufferSizeScalarName(dataset()->node_name()),
-            static_cast<float>(buffer_.size()), num_elements());
-        stats_aggregator->AddScalar(
-            stats_utils::BufferCapacityScalarName(dataset()->node_name()),
-            static_cast<float>(buffer_limit()), num_elements());
+      {
+        mutex_lock l(*mu_);
+        if (stats_aggregator) {
+          stats_aggregator->AddScalar(
+              stats_utils::BufferSizeScalarName(dataset()->node_name()),
+              static_cast<float>(buffer_.size()), num_elements());
+          stats_aggregator->AddScalar(
+              stats_utils::BufferCapacityScalarName(dataset()->node_name()),
+              static_cast<float>(buffer_limit()), num_elements());
+        }
+        // Release mu_
       }
       return input_impl_->GetNext(ctx, out_tensors, end_of_sequence);
     }
@@ -477,6 +480,7 @@ class PrefetchDatasetOp::Dataset : public DatasetBase {
 
     // This mutex is used to ensure exclusivity between multiple threads
     // reading/writing this iterator's local state.
+    // Note: We should never call GetNext on the input while holding this.
     const std::shared_ptr<mutex> mu_;
     // This mutex is used to ensure exclusivity between multiple threads
     // accessing the parent iterator. We keep this separate from `mu_` to

From fbbf05d2a1358d947c617dc13dc2e1c144821954 Mon Sep 17 00:00:00 2001
From: Yuefeng Zhou <yuefengz@google.com>
Date: Thu, 21 Nov 2019 14:30:26 -0800
Subject: [PATCH 045/130] Allow evaluator not in cluster_spec, to be consistent
 with legacy Estimator.

PiperOrigin-RevId: 281833366
Change-Id: Ic580172ba5ec038e246028031ec277b18f31ea56
---
 .../python/distribute/multi_worker_util.py    | 10 +++++---
 .../distribute/multi_worker_util_test.py      | 25 +++++++++++++++++++
 2 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/distribute/multi_worker_util.py b/tensorflow/python/distribute/multi_worker_util.py
index c804ed9b8bcada..4d89b2fab08520 100644
--- a/tensorflow/python/distribute/multi_worker_util.py
+++ b/tensorflow/python/distribute/multi_worker_util.py
@@ -53,7 +53,10 @@ def _validate_cluster_spec(cluster_spec, task_type, task_id):
   It checks:
   0) None of `cluster_spec`, `task_type`, and `task_id` is `None`.
   1) task type is one of "chief", "worker" or "evaluator".
-  2) whether there is such a task type as `task_type` in the `cluster_spec`.
+  2) whether there is such a task type as `task_type` in the `cluster_spec`. The
+     only exception is `evaluator`. In other words, it is still a valid
+     configuration when `task_type` is `evaluator` but it doesn't appear in
+     `cluster_spec`. This is to be compatible with `TF_CONFIG` in Estimator.
   3) whether there is at most one "chief" job.
   4) whether there is at most one "evaluator" job.
   5) whether the `task_id` is smaller than the number of tasks for that
@@ -76,7 +79,7 @@ def _validate_cluster_spec(cluster_spec, task_type, task_id):
         "Unrecognized task_type: %r, valid task types are: \"chief\", "
         "\"worker\", \"evaluator\" and \"ps\"." % task_type)
 
-  if task_type and task_type not in cluster_spec:
+  if task_type and task_type not in cluster_spec and task_type != "evaluator":
     raise ValueError("`task_type` %r not found in cluster_spec." % task_type)
 
   if len(cluster_spec.get("chief", [])) > 1:
@@ -85,7 +88,8 @@ def _validate_cluster_spec(cluster_spec, task_type, task_id):
   if len(cluster_spec.get("evaluator", [])) > 1:
     raise ValueError("There must be at most one 'evaluator' job.")
 
-  if task_id >= len(cluster_spec[task_type]):
+  # The `evaluator` job is allowed to be missing in `cluster_spec`.
+  if task_type in cluster_spec and task_id >= len(cluster_spec[task_type]):
     raise ValueError(
         "The `task_id` %d exceeds the maximum id of %s." % (task_id, task_type))
 
diff --git a/tensorflow/python/distribute/multi_worker_util_test.py b/tensorflow/python/distribute/multi_worker_util_test.py
index dbe57b24e08f34..6a51e71ded77af 100644
--- a/tensorflow/python/distribute/multi_worker_util_test.py
+++ b/tensorflow/python/distribute/multi_worker_util_test.py
@@ -237,5 +237,30 @@ def testLocalLeader(self):
         multi_worker_util.collective_leader(cluster_spec, None, 0), "")
 
 
+# Most of the validation logic is tested by above tests except for some.
+class ClusterSpecValidationTest(test.TestCase):
+
+  def testEvaluatorNotInCluster(self):
+    cluster_spec = {
+        "chief": ["127.0.0.1:1234"],
+        "worker": ["127.0.0.1:8964", "127.0.0.1:2333"],
+        "ps": ["127.0.0.1:1926", "127.0.0.1:3141"]
+    }
+    multi_worker_util._validate_cluster_spec(cluster_spec, "chief", 0)
+    multi_worker_util._validate_cluster_spec(cluster_spec, "worker", 0)
+    multi_worker_util._validate_cluster_spec(cluster_spec, "ps", 0)
+    multi_worker_util._validate_cluster_spec(cluster_spec, "evaluator", 0)
+
+  def testWorkerNotInCluster(self):
+    cluster_spec = {
+        "chief": ["127.0.0.1:1234"],
+        "ps": ["127.0.0.1:1926", "127.0.0.1:3141"]
+    }
+    multi_worker_util._validate_cluster_spec(cluster_spec, "evaluator", 0)
+    with self.assertRaisesRegexp(
+        ValueError, "`task_type` 'worker' not found in cluster_spec."):
+      multi_worker_util._validate_cluster_spec(cluster_spec, "worker", 0)
+
+
 if __name__ == "__main__":
   test.main()

From 3a723696686cb9e0ca7e6bf3c68b2bd6ca90e8fc Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 22 Nov 2019 15:02:00 -0800
Subject: [PATCH 046/130] Override EIGEN strong inline for release builds as
 well.

PiperOrigin-RevId: 282049163
Change-Id: I2fefa6ed9198aee013ce3bdc07d21dfb127833a7
---
 tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh | 2 +-
 tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
index 1e825580071f0c..5f949f2bc7e9e6 100644
--- a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
+++ b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
@@ -104,7 +104,7 @@ if [[ "$RELEASE_BUILD" == 1 ]]; then
   # Overriding eigen strong inline speeds up the compiling of conv_grad_ops_3d.cc and conv_ops_3d.cc
   # by 20 minutes. See https://github.com/tensorflow/tensorflow/issues/10521
   # Because this hurts the performance of TF, we don't override it in release build.
-  export TF_OVERRIDE_EIGEN_STRONG_INLINE=0
+  export TF_OVERRIDE_EIGEN_STRONG_INLINE=1
 else
   export TF_OVERRIDE_EIGEN_STRONG_INLINE=1
 fi
diff --git a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
index 99fa086a025e9a..fd06af5b80541d 100644
--- a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
+++ b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
@@ -104,7 +104,7 @@ if [[ "$RELEASE_BUILD" == 1 ]]; then
   # Overriding eigen strong inline speeds up the compiling of conv_grad_ops_3d.cc and conv_ops_3d.cc
   # by 20 minutes. See https://github.com/tensorflow/tensorflow/issues/10521
   # Because this hurts the performance of TF, we don't override it in release build.
-  export TF_OVERRIDE_EIGEN_STRONG_INLINE=0
+  export TF_OVERRIDE_EIGEN_STRONG_INLINE=1
 else
   export TF_OVERRIDE_EIGEN_STRONG_INLINE=1
 fi

From a8249385daf9418a809027b6b5bd3dad4ea37f49 Mon Sep 17 00:00:00 2001
From: geetachavan1 <53313357+geetachavan1@users.noreply.github.com>
Date: Mon, 25 Nov 2019 10:59:47 -0800
Subject: [PATCH 047/130] Update RELEASE.md

---
 RELEASE.md | 90 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 89 insertions(+), 1 deletion(-)

diff --git a/RELEASE.md b/RELEASE.md
index 52fae8f489cd45..0e77c02a8e15c2 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -1,6 +1,94 @@
 # Release 2.1.0
 
-<REPLACE THIS TEXT WITH THE RELEASE NOTES>
+## Major Features and Improvements
+* `Python packages and versions`
+  * The TensorFlow 2.1.0 release will be the last TF release supporting Python 2 binaries. Python 2 support officially stops an January 1, 2020 as announced [here](https://www.python.org/dev/peps/pep-0373/#update). 
+  * The `tensorflow` pip package now includes GPU support by default (same as `tensorflow-gpu`) for both Linux and Windows. This runs on machines with and without NVIDIA GPUs. `tensorflow-gpu` is still available, and CPU-only packages can be downloaded at `tensorflow-cpu` for users who are concerned about package size.
+* `tf.keras`
+  * `Model.fit_generator`, `Model.evaluate_generator`, `Model.predict_generator`, `Model.train_on_batch`, `Model.test_on_batch`, and `Model.predict_on_batch` methods now respect the `run_eagerly` property, and will correctly run using tf.function by default.
+  * `Model.fit_generator`, `Model.evaluate_generator`, and `Model.predict_generator` are deprecated endpoints. They are subsumed by `Model.fit`, `Model.evaluate`, and `Model.predict` which now support generators and Sequences.
+  * Keras `.compile` `.fit` `.evaluate` and `.predict` are allowed to be outside of the DistributionStrategy scope, as long as the model was constructed inside of a scope.
+  * Keras `model.load_weights` now accepts `skip_mismatch` as an argument. This was available in external Keras, and has now been copied over to `tf.keras`.
+  * Introduced the `TextVectorization` layer, which takes as input raw strings and takes care of text standardization, tokenization, n-gram generation, and vocabulary indexing. See this [end-to-end text classification example](https://colab.research.google.com/drive/1RvCnR7h0_l4Ekn5vINWToI9TNJdpUZB3). 
+  * Experimental support for Keras `.compile`, `.fit`, `.evaluate`, and `.predict` is available for Cloud TPU Pods.
+  * Automatic outside compilation is now enabled for Cloud TPUs. This allows `tf.summary` to be used more conveniently with Cloud TPUs.
+  * Dynamic batch sizes with DistributionStrategy and Keras are supported on Cloud TPUs.
+  * Experimental support for mixed precision is available on GPUs and Cloud TPUs.
+  * Keras reference implementations for many popular models are available in the TensorFlow [Model Garden](https://github.com/tensorflow/models/tree/master/official).
+* `tf.data`
+  * Changes rebatching for `tf.data datasets` + distribution strategies for better performance.   Note that the dataset also behaves slightly differently, in that the rebatched dataset cardinality will always be a multiple of the number of replicas.
+* `TensorRT`
+  * [TensorRT 6.0](https://developer.nvidia.com/tensorrt#tensorrt-whats-new) is now supported and enabled by default. This adds support for more TensorFlow ops including Conv3D, Conv3DBackpropInputV2, AvgPool3D, MaxPool3D, ResizeBilinear, and ResizeNearestNeighbor. In addition, the TensorFlow-TensorRT python conversion API is exported as `tf.experimental.tensorrt.Converter`.
+
+## Known issues
+Because of [issues with building on windows](https://github.com/tensorflow/tensorflow/issues/10521), we turned off eigen strong inlining for the Windows builds. Windows binaries are expected to be slightly slower until the build issues are resolved. 
+
+## Breaking Changes
+* Deletes `Operation.traceback_with_start_lines` for which we know of no usages.
+* Removed `id` from `tf.Tensor.__repr__()` as `id` is not useful other than internal debugging.
+* Some `tf.assert_*` methods now raise assertions at operation creation time (i.e. when this Python line executes) if the input tensors' values are known at that time, not during the `session.run()`. When this happens, a noop is returned and the input tensors are marked non-feedable. In other words, if they are used as keys in `feed_dict` argument to `session.run()`, an error will be raised. Also, because some assert ops don't make it into the graph, the graph structure changes. A different graph can result in different per-op random seeds when they are not given explicitly (most often).
+* The following APIs are not longer experimental: `tf.config.list_logical_devices`, `tf.config.list_physical_devices`, `tf.config.get_visible_devices`, `tf.config.set_visible_devices`, `tf.config.get_logical_device_configuration`, `tf.config.set_logical_device_configuration`.
+* `tf.config.experimentalVirtualDeviceConfiguration` has been renamed to `tf.config.LogicalDeviceConfiguration`.
+* `tf.config.experimental_list_devices` has been removed, please use
+`tf.config.list_logical_devices`.
+
+## Bug Fixes and Other Changes
+* `tf.data`
+  * Fixes concurrency issue with `tf.data.experimental.parallel_interleave` with sloppy=True.
+  * Add `tf.data.experimental.dense_to_ragged_batch()`.
+  * Extend `tf.data` parsing ops to support `RaggedTensors`.
+* `tf.distribute`
+  * Fix issue where GRU would crash or give incorrect output when a `tf.distribute.Strategy` was used. 
+* `tf.estimator`
+  * Added option in `tf.estimator.CheckpointSaverHook` to not save the `GraphDef`.
+* `tf.keras`
+  * Export depthwise_conv2d in `tf.keras.backend`.
+  * In Keras Layers and Models, Variables in `trainable_weights`, `non_trainable_weights`, and `weights` are explicitly deduplicated.
+  * Fix the incorrect stateful behavior of Keras convolutional layers.
+* `tf.lite`
+  * Legalization for `NMS` ops in TFLite.
+  * add `narrow_range` and `axis` to `quantize_v2` and `dequantize` ops. 
+  * Added support for `FusedBatchNormV3` in converter.
+  * Add an `errno`-like field to `NNAPI` delegate for detecting `NNAPI` errors for fallback behaviour.
+  * Refactors `NNAPI` Delegate to support detailed reason why an operation is not accelerated.
+  * Converts hardswish subgraphs into atomic ops.
+* Other
+  * Add `RaggedTensor.merge_dims()`.
+  * Added new `uniform_row_length` row-partitioning tensor to `RaggedTensor`.
+  * Add `shape` arg to `RaggedTensor.to_tensor`; Improve speed of `RaggedTensor.to_tensor`.
+  * `tf.io.parse_sequence_example` and `tf.io.parse_single_sequence_example` now support ragged features.
+  * Fix `while_v2` with variables in custom gradient.
+  * Support taking gradients of V2 `tf.cond` and `tf.while_loop` using `LookupTable`.
+  * Fix bug where `vectorized_map` failed on inputs with unknown static shape.
+  * Add preliminary support for sparse CSR matrices.
+  * Tensor equality with `None` now behaves as expected.
+  * Make calls to `tf.function(f)()`, `tf.function(f).get_concrete_function` and `tf.function(f).get_initialization_function` thread-safe.
+  * Add `tf.debugging.enable_check_numerics()` and `tf.debugging.disable_check_numerics()` to facilitate debugging of numeric instability (`Infinity`s and `NaN`s) under eager mode and `tf.function`s.
+  * Extend `tf.identity` to work with CompositeTensors (such as SparseTensor)
+  * Added more `dtypes` and zero-sized inputs to `Einsum` Op and improved its performance
+  * Enable multi-worker `NCCL` `all-reduce` inside functions executing eagerly.
+  * Added complex128 support to `RFFT`, `RFFT2D`, `RFFT3D`, `IRFFT`, `IRFFT2D`, and `IRFFT3D`.
+  * Add `pfor` converter for `SelfAdjointEigV2`.
+  * Add `tf.math.ndtri` and `tf.math.erfinv`.
+  * Add `tf.config.experimental.enable_mlir_bridge` to allow using MLIR compiler bridge in eager model.
+  * Added support for MatrixSolve on Cloud TPU / XLA.
+  * Added `tf.autodiff.ForwardAccumulator` for forward-mode autodiff
+  * Add `LinearOperatorPermutation`.
+  * A few performance optimizations on `tf.reduce_logsumexp`.
+  * Added multilabel handling to `AUC` metric
+  * Optimization on `zeros_like`.
+  * Dimension constructor now requires `None` or types with an `__index__` method.
+  * Add `tf random.uniform` microbenchmark.
+  * Use `_protogen` suffix for proto library targets instead of `_cc_protogen` suffix.
+  * Moving the checkpoint reader from `swig` to `pybind11`.
+  * tf.device & MirroredStrategy now supports passing in a tf.config.LogicalDevice
+  * If you're building Tensorflow from source, consider using [bazelisk](https://github.com/bazelisk/bazel) to automatically download and use the correct Bazel version. Bazelisk reads the `.bazelversion` file at the root of the project directory.
+
+## Thanks to our Contributors
+
+This release contains contributions from many people at Google, as well as:
+
+8bitmp3, Aaron Ma, AbdüLhamit Yilmaz, Abhai Kollara, aflc, Ag Ramesh, Albert Z. Guo, Alex Torres, amoitra, Andrii Prymostka, angeliand, Anshuman Tripathy, Anthony Barbier, Anton Kachatkou, Anubh-V, Anuja Jakhade, Artem Ryabov, autoih, Bairen Yi, Bas Aarts, Basit Ayantunde, Ben Barsdell, Bhavani Subramanian, Brett Koonce, candy.dc, Captain-Pool, caster, cathy, Chong Yan, Choong Yin Thong, Clayne Robison, Colle, Dan Ganea, David Norman, David Refaeli, dengziming, Diego Caballero, Divyanshu, djshen, Douman, Duncan Riach, EFanZh, Elena Zhelezina, Eric Schweitz, Evgenii Zheltonozhskii, Fei Hu, fo40225, Fred Reiss, Frederic Bastien, Fredrik Knutsson, fsx950223, fwcore, George Grzegorz Pawelczak, George Sterpu, Gian Marco Iodice, Giorgio Arena, giuros01, Gomathi Ramamurthy, Guozhong Zhuang, Haifeng Jin, Haoyu Wu, HarikrishnanBalagopal, HJYOO, Huang Chen-Yi, Ilham Firdausi Putra, Imran Salam, Jared Nielsen, Jason Zaman, Jasper Vicenti, Jeff Daily, Jeff Poznanovic, Jens Elofsson, Jerry Shih, jerryyin, Jesper Dramsch, jim.meyer, Jongwon Lee, Jun Wan, Junyuan Xie, Kaixi Hou, kamalkraj, Kan Chen, Karthik Muthuraman, Keiji Ariyama, Kevin Rose, Kevin Wang, Koan-Sin Tan, kstuedem, Kwabena W. Agyeman, Lakshay Tokas, latyas, Leslie-Fang-Intel, Li, Guizi, Luciano Resende, Lukas Folle, Lukas Geiger, Mahmoud Abuzaina, Manuel Freiberger, Mark Ryan, Martin Mlostek, Masaki Kozuki, Matthew Bentham, Matthew Denton, mbhuiyan, mdfaijul, Muhwan Kim, Nagy Mostafa, nammbash, Nathan Luehr, Nathan Wells, Niranjan Hasabnis, Oleksii Volkovskyi, Olivier Moindrot, olramde, Ouyang Jin, OverLordGoldDragon, Pallavi G, Paul Andrey, Paul Wais, pkanwar23, Pooya Davoodi, Prabindh Sundareson, Rajeshwar Reddy T, Ralovich, Kristof, Refraction-Ray, Richard Barnes, richardbrks, Robert Herbig, Romeo Kienzler, Ryan Mccormick, saishruthi, Saket Khandelwal, Sami Kama, Sana Damani, Satoshi Tanaka, Sergey Mironov, Sergii Khomenko, Shahid, Shawn Presser, ShengYang1, Siddhartha Bagaria, Simon Plovyt, skeydan, srinivasan.narayanamoorthy, Stephen Mugisha, sunway513, Takeshi Watanabe, Taylor Jakobson, TengLu, TheMindVirus, ThisIsIsaac, Tim Gates, Timothy Liu, Tomer Gafner, Trent Lo, Trevor Hickey, Trevor Morris, vcarpani, Wei Wang, Wen-Heng (Jack) Chung, wenshuai, Wenshuai-Xiaomi, wenxizhu, william, William D. Irons, Xinan Jiang, Yannic, Yasir Modak, Yasuhiro Matsumoto, Yong Tang, Yongfeng Gu, Youwei Song, Zaccharie Ramzi, Zhang, Zhenyu Guo, 王振华 (Zhenhua Wang), 韩董, 이중건 Isaac Lee
 
 # Release 1.15.0
 This is the last 1.x release for TensorFlow. We do not expect to update the 1.x branch with features, although we will issue patch releases to fix vulnerabilities for at least one year. 

From 710eaa7d0edb00f4fd1a9d4cc496a5099231239d Mon Sep 17 00:00:00 2001
From: Martin Wicke <577277+martinwicke@users.noreply.github.com>
Date: Mon, 25 Nov 2019 12:21:14 -0800
Subject: [PATCH 048/130] Update RELEASE.md

---
 RELEASE.md | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/RELEASE.md b/RELEASE.md
index 0e77c02a8e15c2..4e9731f6eea75f 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -1,8 +1,9 @@
 # Release 2.1.0
 
+TensorFlow 2.1 will be the last TF release supporting Python 2. Python 2 support [officially ends an January 1, 2020](https://www.python.org/dev/peps/pep-0373/#update). [As announced earlier](https://groups.google.com/a/tensorflow.org/d/msg/announce/gVwS5RC8mds/dCt1ka2XAAAJ), TensorFlow will also stop supporting Python 2 starting January 1, 2020, and no more releases are expected in 2019.
+
 ## Major Features and Improvements
 * `Python packages and versions`
-  * The TensorFlow 2.1.0 release will be the last TF release supporting Python 2 binaries. Python 2 support officially stops an January 1, 2020 as announced [here](https://www.python.org/dev/peps/pep-0373/#update). 
   * The `tensorflow` pip package now includes GPU support by default (same as `tensorflow-gpu`) for both Linux and Windows. This runs on machines with and without NVIDIA GPUs. `tensorflow-gpu` is still available, and CPU-only packages can be downloaded at `tensorflow-cpu` for users who are concerned about package size.
 * `tf.keras`
   * `Model.fit_generator`, `Model.evaluate_generator`, `Model.predict_generator`, `Model.train_on_batch`, `Model.test_on_batch`, and `Model.predict_on_batch` methods now respect the `run_eagerly` property, and will correctly run using tf.function by default.
@@ -26,7 +27,7 @@ Because of [issues with building on windows](https://github.com/tensorflow/tenso
 ## Breaking Changes
 * Deletes `Operation.traceback_with_start_lines` for which we know of no usages.
 * Removed `id` from `tf.Tensor.__repr__()` as `id` is not useful other than internal debugging.
-* Some `tf.assert_*` methods now raise assertions at operation creation time (i.e. when this Python line executes) if the input tensors' values are known at that time, not during the `session.run()`. When this happens, a noop is returned and the input tensors are marked non-feedable. In other words, if they are used as keys in `feed_dict` argument to `session.run()`, an error will be raised. Also, because some assert ops don't make it into the graph, the graph structure changes. A different graph can result in different per-op random seeds when they are not given explicitly (most often).
+* Some `tf.assert_*` methods now raise assertions at operation creation time if the input tensors' values are known at that time, not during the `session.run()`. This only changes behavior when the graph execution would have resulted in an error. When this happens, a noop is returned and the input tensors are marked non-feedable. In other words, if they are used as keys in `feed_dict` argument to `session.run()`, an error will be raised. Also, because some assert ops don't make it into the graph, the graph structure changes. A different graph can result in different per-op random seeds when they are not given explicitly (most often).
 * The following APIs are not longer experimental: `tf.config.list_logical_devices`, `tf.config.list_physical_devices`, `tf.config.get_visible_devices`, `tf.config.set_visible_devices`, `tf.config.get_logical_device_configuration`, `tf.config.set_logical_device_configuration`.
 * `tf.config.experimentalVirtualDeviceConfiguration` has been renamed to `tf.config.LogicalDeviceConfiguration`.
 * `tf.config.experimental_list_devices` has been removed, please use
@@ -78,10 +79,10 @@ Because of [issues with building on windows](https://github.com/tensorflow/tenso
   * Added multilabel handling to `AUC` metric
   * Optimization on `zeros_like`.
   * Dimension constructor now requires `None` or types with an `__index__` method.
-  * Add `tf random.uniform` microbenchmark.
+  * Add `tf.random.uniform` microbenchmark.
   * Use `_protogen` suffix for proto library targets instead of `_cc_protogen` suffix.
   * Moving the checkpoint reader from `swig` to `pybind11`.
-  * tf.device & MirroredStrategy now supports passing in a tf.config.LogicalDevice
+  * `tf.device` & `MirroredStrategy` now supports passing in a `tf.config.LogicalDevice`
   * If you're building Tensorflow from source, consider using [bazelisk](https://github.com/bazelisk/bazel) to automatically download and use the correct Bazel version. Bazelisk reads the `.bazelversion` file at the root of the project directory.
 
 ## Thanks to our Contributors

From fee475d43d8fee01b0cbe1cab40e847b5d5b30eb Mon Sep 17 00:00:00 2001
From: Martin Wicke <577277+martinwicke@users.noreply.github.com>
Date: Mon, 25 Nov 2019 12:23:41 -0800
Subject: [PATCH 049/130] Update RELEASE.md

---
 RELEASE.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/RELEASE.md b/RELEASE.md
index 4e9731f6eea75f..7b4f0f29f1a609 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -3,8 +3,7 @@
 TensorFlow 2.1 will be the last TF release supporting Python 2. Python 2 support [officially ends an January 1, 2020](https://www.python.org/dev/peps/pep-0373/#update). [As announced earlier](https://groups.google.com/a/tensorflow.org/d/msg/announce/gVwS5RC8mds/dCt1ka2XAAAJ), TensorFlow will also stop supporting Python 2 starting January 1, 2020, and no more releases are expected in 2019.
 
 ## Major Features and Improvements
-* `Python packages and versions`
-  * The `tensorflow` pip package now includes GPU support by default (same as `tensorflow-gpu`) for both Linux and Windows. This runs on machines with and without NVIDIA GPUs. `tensorflow-gpu` is still available, and CPU-only packages can be downloaded at `tensorflow-cpu` for users who are concerned about package size.
+* The `tensorflow` pip package now includes GPU support by default (same as `tensorflow-gpu`) for both Linux and Windows. This runs on machines with and without NVIDIA GPUs. `tensorflow-gpu` is still available, and CPU-only packages can be downloaded at `tensorflow-cpu` for users who are concerned about package size.
 * `tf.keras`
   * `Model.fit_generator`, `Model.evaluate_generator`, `Model.predict_generator`, `Model.train_on_batch`, `Model.test_on_batch`, and `Model.predict_on_batch` methods now respect the `run_eagerly` property, and will correctly run using tf.function by default.
   * `Model.fit_generator`, `Model.evaluate_generator`, and `Model.predict_generator` are deprecated endpoints. They are subsumed by `Model.fit`, `Model.evaluate`, and `Model.predict` which now support generators and Sequences.

From 617f788da56c98c46ee346e8b2e7bf1e061bb3e1 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 22 Nov 2019 16:39:08 -0800
Subject: [PATCH 050/130] Allows Keras TensorLikeDataAdapter to handle pandas
 series and dataframes.

PiperOrigin-RevId: 282067215
Change-Id: Ied0b0211ab38420639e00d2b03693c0330fffe8c
---
 .../python/keras/engine/data_adapter.py       | 10 ++-
 .../python/keras/engine/data_adapter_test.py  | 82 +++++++++++++++++++
 2 files changed, 91 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/keras/engine/data_adapter.py b/tensorflow/python/keras/engine/data_adapter.py
index 3f2cc2eaa6509d..1474cf7a127cca 100644
--- a/tensorflow/python/keras/engine/data_adapter.py
+++ b/tensorflow/python/keras/engine/data_adapter.py
@@ -45,6 +45,10 @@
 except ImportError:
   scipy_sparse = None
 
+try:
+  import pandas as pd  # pylint: disable=g-import-not-at-top
+except ImportError:
+  pd = None
 
 try:
   # In Python2 unicode is a scalar type
@@ -214,8 +218,12 @@ def can_handle(x, y=None):
     if y is not None:
       flat_inputs += nest.flatten(y)
 
+    tensor_types = (ops.Tensor, np.ndarray)
+    if pd:
+      tensor_types = (ops.Tensor, np.ndarray, pd.Series, pd.DataFrame)
+
     def _is_tensor(v):
-      if isinstance(v, (ops.Tensor, np.ndarray)):
+      if isinstance(v, tensor_types):
         return True
       return False
 
diff --git a/tensorflow/python/keras/engine/data_adapter_test.py b/tensorflow/python/keras/engine/data_adapter_test.py
index bce5b923b92052..63d04d97ad6813 100644
--- a/tensorflow/python/keras/engine/data_adapter_test.py
+++ b/tensorflow/python/keras/engine/data_adapter_test.py
@@ -160,6 +160,88 @@ def test_training_numpy(self):
                        run_eagerly=testing_utils.should_run_eagerly())
     self.model.fit(self.numpy_input, self.numpy_target, batch_size=5)
 
+  def test_can_handle_pandas(self):
+    try:
+      import pandas as pd  # pylint: disable=g-import-not-at-top
+    except ImportError:
+      self.skipTest('Skipping test because pandas is not installed.')
+    self.assertTrue(self.adapter_cls.can_handle(pd.DataFrame(self.numpy_input)))
+    self.assertTrue(
+        self.adapter_cls.can_handle(pd.DataFrame(self.numpy_input)[0]))
+    self.assertTrue(
+        self.adapter_cls.can_handle(
+            pd.DataFrame(self.numpy_input),
+            pd.DataFrame(self.numpy_input)[0]))
+
+  @keras_parameterized.run_all_keras_modes(always_skip_v1=True)
+  def test_training_pandas(self):
+    try:
+      import pandas as pd  # pylint: disable=g-import-not-at-top
+    except ImportError:
+      self.skipTest('Skipping test because pandas is not installed.')
+    input_a = keras.Input(shape=(3,), name='input_a')
+    input_b = keras.Input(shape=(3,), name='input_b')
+    input_c = keras.Input(shape=(1,), name='input_b')
+
+    x = keras.layers.Dense(4, name='dense_1')(input_a)
+    y = keras.layers.Dense(3, name='dense_2')(input_b)
+    z = keras.layers.Dense(1, name='dense_3')(input_c)
+
+    model_1 = keras.Model(inputs=input_a, outputs=x)
+    model_2 = keras.Model(inputs=[input_a, input_b], outputs=[x, y])
+    model_3 = keras.Model(inputs=input_c, outputs=z)
+
+    model_1.compile(optimizer='rmsprop', loss='mse')
+    model_2.compile(optimizer='rmsprop', loss='mse')
+
+    input_a_np = np.random.random((10, 3))
+    input_b_np = np.random.random((10, 3))
+    input_a_df = pd.DataFrame(input_a_np)
+    input_b_df = pd.DataFrame(input_b_np)
+
+    output_a_df = pd.DataFrame(np.random.random((10, 4)))
+    output_b_df = pd.DataFrame(np.random.random((10, 3)))
+
+    model_1.fit(input_a_df,
+                output_a_df)
+    model_2.fit([input_a_df, input_b_df],
+                [output_a_df, output_b_df])
+    model_1.fit([input_a_df],
+                [output_a_df])
+    model_1.fit({'input_a': input_a_df},
+                output_a_df)
+    model_2.fit({'input_a': input_a_df, 'input_b': input_b_df},
+                [output_a_df, output_b_df])
+
+    model_1.evaluate(input_a_df,
+                     output_a_df)
+    model_2.evaluate([input_a_df, input_b_df],
+                     [output_a_df, output_b_df])
+    model_1.evaluate([input_a_df],
+                     [output_a_df])
+    model_1.evaluate({'input_a': input_a_df},
+                     output_a_df)
+    model_2.evaluate({'input_a': input_a_df, 'input_b': input_b_df},
+                     [output_a_df, output_b_df])
+
+    # Verify predicting on pandas vs numpy returns the same result
+    predict_1_pandas = model_1.predict(input_a_df)
+    predict_2_pandas = model_2.predict([input_a_df, input_b_df])
+    predict_3_pandas = model_3.predict(input_a_df[0])
+
+    predict_1_numpy = model_1.predict(input_a_np)
+    predict_2_numpy = model_2.predict([input_a_np, input_b_np])
+    predict_3_numpy = model_3.predict(np.asarray(input_a_df[0]))
+
+    self.assertAllClose(predict_1_numpy, predict_1_pandas)
+    self.assertAllClose(predict_2_numpy, predict_2_pandas)
+    self.assertAllClose(predict_3_numpy, predict_3_pandas)
+
+    # Extra ways to pass in dataframes
+    model_1.predict([input_a_df])
+    model_1.predict({'input_a': input_a_df})
+    model_2.predict({'input_a': input_a_df, 'input_b': input_b_df})
+
   def test_can_handle(self):
     self.assertTrue(self.adapter_cls.can_handle(self.tensor_input))
     self.assertTrue(

From 058f14880166d1b6ee699b857751e7ffc6a6c599 Mon Sep 17 00:00:00 2001
From: Yifei Feng <yifeif@google.com>
Date: Mon, 25 Nov 2019 13:46:08 -0800
Subject: [PATCH 051/130] Update windows release build script to create the
 correct pip package names.

PiperOrigin-RevId: 282425067
Change-Id: I13b05c37eb9dbeac2d9beea31b21898f12cfdc89
---
 .../ci_build/release/windows/cpu_py35_full/release.bat      | 2 +-
 .../ci_build/release/windows/cpu_py36_full/release.bat      | 2 +-
 .../ci_build/release/windows/cpu_py37_full/release.bat      | 2 +-
 .../ci_build/release/windows/gpu_py35_full/nightly.bat      | 5 ++++-
 .../release/windows/gpu_py35_full/release_pip_rename.sh     | 4 ++--
 .../ci_build/release/windows/gpu_py35_full/release_v1.bat   | 2 +-
 .../ci_build/release/windows/gpu_py36_full/release.bat      | 5 ++++-
 .../release/windows/gpu_py36_full/release_pip_rename.sh     | 4 ++--
 .../ci_build/release/windows/gpu_py36_full/release_v1.bat   | 2 +-
 .../ci_build/release/windows/gpu_py37_full/release.bat      | 5 ++++-
 .../release/windows/gpu_py37_full/release_pip_rename.sh     | 4 ++--
 .../ci_build/release/windows/gpu_py37_full/release_v1.bat   | 2 +-
 .../tools/ci_build/windows/cpu/pip/build_tf_windows.sh      | 6 +++++-
 .../tools/ci_build/windows/gpu/pip/build_tf_windows.sh      | 6 +++++-
 14 files changed, 34 insertions(+), 17 deletions(-)

diff --git a/tensorflow/tools/ci_build/release/windows/cpu_py35_full/release.bat b/tensorflow/tools/ci_build/release/windows/cpu_py35_full/release.bat
index f10ba0ecc4fedc..bd8c217ddefe77 100644
--- a/tensorflow/tools/ci_build/release/windows/cpu_py35_full/release.bat
+++ b/tensorflow/tools/ci_build/release/windows/cpu_py35_full/release.bat
@@ -17,4 +17,4 @@ SET PYTHON_DIRECTORY=Python35
 
 CALL tensorflow\tools\ci_build\release\common_win.bat
 
-call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --release_build --extra_build_flags "--config=v2" --extra_test_flags "--test_env=TF2_BEHAVIOR=1"
+call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --release_build --extra_build_flags "--config=v2" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow_cpu"
diff --git a/tensorflow/tools/ci_build/release/windows/cpu_py36_full/release.bat b/tensorflow/tools/ci_build/release/windows/cpu_py36_full/release.bat
index 244e9479eb53e0..0a81a90a43164c 100644
--- a/tensorflow/tools/ci_build/release/windows/cpu_py36_full/release.bat
+++ b/tensorflow/tools/ci_build/release/windows/cpu_py36_full/release.bat
@@ -17,4 +17,4 @@ SET PYTHON_DIRECTORY=Python36
 
 CALL tensorflow\tools\ci_build\release\common_win.bat
 
-call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --release_build --extra_build_flags "--config=v2" --extra_test_flags "--test_env=TF2_BEHAVIOR=1"
+call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --release_build --extra_build_flags "--config=v2" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow_cpu"
diff --git a/tensorflow/tools/ci_build/release/windows/cpu_py37_full/release.bat b/tensorflow/tools/ci_build/release/windows/cpu_py37_full/release.bat
index 4164c3cc9b5395..9591d7aac343bd 100644
--- a/tensorflow/tools/ci_build/release/windows/cpu_py37_full/release.bat
+++ b/tensorflow/tools/ci_build/release/windows/cpu_py37_full/release.bat
@@ -17,4 +17,4 @@ SET PYTHON_DIRECTORY=Python37
 
 CALL tensorflow\tools\ci_build\release\common_win.bat
 
-call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --release_build --extra_build_flags "--config=v2" --extra_test_flags "--test_env=TF2_BEHAVIOR=1"
+call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --release_build --extra_build_flags "--config=v2" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow_cpu"
diff --git a/tensorflow/tools/ci_build/release/windows/gpu_py35_full/nightly.bat b/tensorflow/tools/ci_build/release/windows/gpu_py35_full/nightly.bat
index 19e8ebcfabd41b..56d5b9637b66a1 100644
--- a/tensorflow/tools/ci_build/release/windows/gpu_py35_full/nightly.bat
+++ b/tensorflow/tools/ci_build/release/windows/gpu_py35_full/nightly.bat
@@ -17,4 +17,7 @@ SET PYTHON_DIRECTORY=Python35
 
 CALL tensorflow\tools\ci_build\release\common_win.bat
 
-call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --extra_build_flags "--config=v2" --extra_test_flags "--test_env=TF2_BEHAVIOR=1"
+call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --extra_build_flags "--config=v2" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow"
+
+for %%a in ("%~dp0\.") do set "PARENT_DIR=%%~nxa"
+bash -l tensorflow\tools\ci_build\release\windows\%PARENT_DIR%\release_pip_rename.sh
\ No newline at end of file
diff --git a/tensorflow/tools/ci_build/release/windows/gpu_py35_full/release_pip_rename.sh b/tensorflow/tools/ci_build/release/windows/gpu_py35_full/release_pip_rename.sh
index 6a868382777791..039f9516d8601d 100644
--- a/tensorflow/tools/ci_build/release/windows/gpu_py35_full/release_pip_rename.sh
+++ b/tensorflow/tools/ci_build/release/windows/gpu_py35_full/release_pip_rename.sh
@@ -19,6 +19,6 @@ set -x
 source tensorflow/tools/ci_build/release/common.sh
 
 # Copy and rename to tensorflow
-for f in $(ls py_test_dir/tensorflow_gpu-*cp3*-cp3*m-win_amd64.whl); do
-  copy_to_new_project_name "${f}" tensorflow
+for f in $(ls py_test_dir/tensorflow-*cp3*-cp3*m-win_amd64.whl); do
+  copy_to_new_project_name "${f}" tensorflow_gpu
 done
diff --git a/tensorflow/tools/ci_build/release/windows/gpu_py35_full/release_v1.bat b/tensorflow/tools/ci_build/release/windows/gpu_py35_full/release_v1.bat
index abd63888e70c71..55e4e4f57827ec 100644
--- a/tensorflow/tools/ci_build/release/windows/gpu_py35_full/release_v1.bat
+++ b/tensorflow/tools/ci_build/release/windows/gpu_py35_full/release_v1.bat
@@ -17,7 +17,7 @@ SET PYTHON_DIRECTORY=Python35
 
 CALL tensorflow\tools\ci_build\release\common_win.bat
 
-call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --release_build
+call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --release_build --project_name "tensorflow"
 
 for %%a in ("%~dp0\.") do set "PARENT_DIR=%%~nxa"
 bash -l tensorflow\tools\ci_build\release\windows\%PARENT_DIR%\release_pip_rename.sh
diff --git a/tensorflow/tools/ci_build/release/windows/gpu_py36_full/release.bat b/tensorflow/tools/ci_build/release/windows/gpu_py36_full/release.bat
index ed0c127afe5512..ede8bd35f52f24 100644
--- a/tensorflow/tools/ci_build/release/windows/gpu_py36_full/release.bat
+++ b/tensorflow/tools/ci_build/release/windows/gpu_py36_full/release.bat
@@ -17,4 +17,7 @@ SET PYTHON_DIRECTORY=Python36
 
 CALL tensorflow\tools\ci_build\release\common_win.bat
 
-call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --release_build --extra_build_flags "--config=v2" --extra_test_flags "--test_env=TF2_BEHAVIOR=1"
+call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --release_build --extra_build_flags "--config=v2" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow"
+
+for %%a in ("%~dp0\.") do set "PARENT_DIR=%%~nxa"
+bash -l tensorflow\tools\ci_build\release\windows\%PARENT_DIR%\release_pip_rename.sh
\ No newline at end of file
diff --git a/tensorflow/tools/ci_build/release/windows/gpu_py36_full/release_pip_rename.sh b/tensorflow/tools/ci_build/release/windows/gpu_py36_full/release_pip_rename.sh
index 6a868382777791..039f9516d8601d 100644
--- a/tensorflow/tools/ci_build/release/windows/gpu_py36_full/release_pip_rename.sh
+++ b/tensorflow/tools/ci_build/release/windows/gpu_py36_full/release_pip_rename.sh
@@ -19,6 +19,6 @@ set -x
 source tensorflow/tools/ci_build/release/common.sh
 
 # Copy and rename to tensorflow
-for f in $(ls py_test_dir/tensorflow_gpu-*cp3*-cp3*m-win_amd64.whl); do
-  copy_to_new_project_name "${f}" tensorflow
+for f in $(ls py_test_dir/tensorflow-*cp3*-cp3*m-win_amd64.whl); do
+  copy_to_new_project_name "${f}" tensorflow_gpu
 done
diff --git a/tensorflow/tools/ci_build/release/windows/gpu_py36_full/release_v1.bat b/tensorflow/tools/ci_build/release/windows/gpu_py36_full/release_v1.bat
index 7eafdf8af20b0a..a66ca900e47b66 100644
--- a/tensorflow/tools/ci_build/release/windows/gpu_py36_full/release_v1.bat
+++ b/tensorflow/tools/ci_build/release/windows/gpu_py36_full/release_v1.bat
@@ -17,7 +17,7 @@ SET PYTHON_DIRECTORY=Python36
 
 CALL tensorflow\tools\ci_build\release\common_win.bat
 
-call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --release_build
+call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --release_build --project_name "tensorflow"
 
 for %%a in ("%~dp0\.") do set "PARENT_DIR=%%~nxa"
 bash -l tensorflow\tools\ci_build\release\windows\%PARENT_DIR%\release_pip_rename.sh
diff --git a/tensorflow/tools/ci_build/release/windows/gpu_py37_full/release.bat b/tensorflow/tools/ci_build/release/windows/gpu_py37_full/release.bat
index 00d85ef9119b71..7509270fc43796 100644
--- a/tensorflow/tools/ci_build/release/windows/gpu_py37_full/release.bat
+++ b/tensorflow/tools/ci_build/release/windows/gpu_py37_full/release.bat
@@ -17,4 +17,7 @@ SET PYTHON_DIRECTORY=Python37
 
 CALL tensorflow\tools\ci_build\release\common_win.bat
 
-call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --release_build --extra_build_flags "--config=v2" --extra_test_flags "--test_env=TF2_BEHAVIOR=1"
+call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --release_build --extra_build_flags "--config=v2" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow"
+
+for %%a in ("%~dp0\.") do set "PARENT_DIR=%%~nxa"
+bash -l tensorflow\tools\ci_build\release\windows\%PARENT_DIR%\release_pip_rename.sh
\ No newline at end of file
diff --git a/tensorflow/tools/ci_build/release/windows/gpu_py37_full/release_pip_rename.sh b/tensorflow/tools/ci_build/release/windows/gpu_py37_full/release_pip_rename.sh
index 6a868382777791..039f9516d8601d 100644
--- a/tensorflow/tools/ci_build/release/windows/gpu_py37_full/release_pip_rename.sh
+++ b/tensorflow/tools/ci_build/release/windows/gpu_py37_full/release_pip_rename.sh
@@ -19,6 +19,6 @@ set -x
 source tensorflow/tools/ci_build/release/common.sh
 
 # Copy and rename to tensorflow
-for f in $(ls py_test_dir/tensorflow_gpu-*cp3*-cp3*m-win_amd64.whl); do
-  copy_to_new_project_name "${f}" tensorflow
+for f in $(ls py_test_dir/tensorflow-*cp3*-cp3*m-win_amd64.whl); do
+  copy_to_new_project_name "${f}" tensorflow_gpu
 done
diff --git a/tensorflow/tools/ci_build/release/windows/gpu_py37_full/release_v1.bat b/tensorflow/tools/ci_build/release/windows/gpu_py37_full/release_v1.bat
index 261947f58f380b..059e28134c881d 100644
--- a/tensorflow/tools/ci_build/release/windows/gpu_py37_full/release_v1.bat
+++ b/tensorflow/tools/ci_build/release/windows/gpu_py37_full/release_v1.bat
@@ -17,7 +17,7 @@ SET PYTHON_DIRECTORY=Python37
 
 CALL tensorflow\tools\ci_build\release\common_win.bat
 
-call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --release_build
+call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --release_build --project_name "tensorflow"
 
 for %%a in ("%~dp0\.") do set "PARENT_DIR=%%~nxa"
 bash -l tensorflow\tools\ci_build\release\windows\%PARENT_DIR%\release_pip_rename.sh
diff --git a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
index 5f949f2bc7e9e6..89cc383d6f11a4 100644
--- a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
+++ b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
@@ -120,6 +120,10 @@ if [[ "$TF_NIGHTLY" == 1 ]]; then
   else
     EXTRA_PIP_FLAGS="--project_name ${PROJECT_NAME} --nightly_flag"
   fi
+else
+  if [[ -v ${PROJECT_NAME}  ]]; then
+    EXTRA_PIP_FLAGS="--project_name ${PROJECT_NAME}"
+  fi
 fi
 
 # Enable short object file path to avoid long path issue on Windows.
@@ -154,7 +158,7 @@ if [[ "$TF_NIGHTLY" == 1 ]]; then
 fi
 
 # Running python tests on Windows needs pip package installed
-PIP_NAME=$(ls ${PY_TEST_DIR}/tensorflow-*.whl)
+PIP_NAME=$(ls ${PY_TEST_DIR}/tensorflow*.whl)
 reinstall_tensorflow_pip ${PIP_NAME}
 
 # NUMBER_OF_PROCESSORS is predefined on Windows
diff --git a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
index fd06af5b80541d..17a45449a2e282 100644
--- a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
+++ b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
@@ -120,6 +120,10 @@ if [[ "$TF_NIGHTLY" == 1 ]]; then
   else
     EXTRA_PIP_FLAGS="--project_name ${PROJECT_NAME} --nightly_flag"
   fi
+else
+  if [[ -v ${PROJECT_NAME}  ]]; then
+    EXTRA_PIP_FLAGS="--project_name ${PROJECT_NAME}"
+  fi
 fi
 
 # Enable short object file path to avoid long path issue on Windows.
@@ -154,7 +158,7 @@ if [[ "$TF_NIGHTLY" == 1 ]]; then
 fi
 
 # Running python tests on Windows needs pip package installed
-PIP_NAME=$(ls ${PY_TEST_DIR}/tensorflow_gpu-*.whl)
+PIP_NAME=$(ls ${PY_TEST_DIR}/tensorflow*.whl)
 reinstall_tensorflow_pip ${PIP_NAME}
 
 TF_GPU_COUNT=${TF_GPU_COUNT:-4}

From ae30c1ba8712ddd8dd1bcea03766ef6e7000c5b8 Mon Sep 17 00:00:00 2001
From: Yifei Feng <yifeif@google.com>
Date: Tue, 26 Nov 2019 10:57:26 -0800
Subject: [PATCH 052/130] Fix bug in if -v check.

PiperOrigin-RevId: 282598769
Change-Id: Ic71c23ba3983c397cac81a44cd4a71f76e5ffef3
---
 tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh | 2 +-
 tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
index 89cc383d6f11a4..223c10fb0256dc 100644
--- a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
+++ b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
@@ -121,7 +121,7 @@ if [[ "$TF_NIGHTLY" == 1 ]]; then
     EXTRA_PIP_FLAGS="--project_name ${PROJECT_NAME} --nightly_flag"
   fi
 else
-  if [[ -v ${PROJECT_NAME}  ]]; then
+  if [[ -v PROJECT_NAME  ]]; then
     EXTRA_PIP_FLAGS="--project_name ${PROJECT_NAME}"
   fi
 fi
diff --git a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
index 17a45449a2e282..7cf23775b649d3 100644
--- a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
+++ b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
@@ -121,7 +121,7 @@ if [[ "$TF_NIGHTLY" == 1 ]]; then
     EXTRA_PIP_FLAGS="--project_name ${PROJECT_NAME} --nightly_flag"
   fi
 else
-  if [[ -v ${PROJECT_NAME}  ]]; then
+  if [[ -v PROJECT_NAME  ]]; then
     EXTRA_PIP_FLAGS="--project_name ${PROJECT_NAME}"
   fi
 fi

From fff3b08dd10ad510408481cce53bc2e5005758b0 Mon Sep 17 00:00:00 2001
From: Yifei Feng <yifeif@google.com>
Date: Tue, 26 Nov 2019 15:02:17 -0800
Subject: [PATCH 053/130] Move gpu script update to the correct file.

PiperOrigin-RevId: 282646719
Change-Id: Idbec43ef97473209cf301ef66ec3f323952e42ad
---
 .../tools/ci_build/release/windows/gpu_py35_full/nightly.bat | 5 +----
 .../tools/ci_build/release/windows/gpu_py35_full/release.bat | 5 ++++-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tensorflow/tools/ci_build/release/windows/gpu_py35_full/nightly.bat b/tensorflow/tools/ci_build/release/windows/gpu_py35_full/nightly.bat
index 56d5b9637b66a1..19e8ebcfabd41b 100644
--- a/tensorflow/tools/ci_build/release/windows/gpu_py35_full/nightly.bat
+++ b/tensorflow/tools/ci_build/release/windows/gpu_py35_full/nightly.bat
@@ -17,7 +17,4 @@ SET PYTHON_DIRECTORY=Python35
 
 CALL tensorflow\tools\ci_build\release\common_win.bat
 
-call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --extra_build_flags "--config=v2" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow"
-
-for %%a in ("%~dp0\.") do set "PARENT_DIR=%%~nxa"
-bash -l tensorflow\tools\ci_build\release\windows\%PARENT_DIR%\release_pip_rename.sh
\ No newline at end of file
+call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --extra_build_flags "--config=v2" --extra_test_flags "--test_env=TF2_BEHAVIOR=1"
diff --git a/tensorflow/tools/ci_build/release/windows/gpu_py35_full/release.bat b/tensorflow/tools/ci_build/release/windows/gpu_py35_full/release.bat
index 71db61889bcff2..cba62225bee4fe 100644
--- a/tensorflow/tools/ci_build/release/windows/gpu_py35_full/release.bat
+++ b/tensorflow/tools/ci_build/release/windows/gpu_py35_full/release.bat
@@ -17,4 +17,7 @@ SET PYTHON_DIRECTORY=Python35
 
 CALL tensorflow\tools\ci_build\release\common_win.bat
 
-call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --release_build --extra_build_flags "--config=v2" --extra_test_flags "--test_env=TF2_BEHAVIOR=1"
+call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --release_build --extra_build_flags "--config=v2" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow"
+
+for %%a in ("%~dp0\.") do set "PARENT_DIR=%%~nxa"
+bash -l tensorflow\tools\ci_build\release\windows\%PARENT_DIR%\release_pip_rename.sh

From b648e4528d2186bcedb033a99b06c14b0a69dfa3 Mon Sep 17 00:00:00 2001
From: Mahmoud Abuzaina <mahmoud.abuzaina@intel.com>
Date: Tue, 26 Nov 2019 18:13:20 -0800
Subject: [PATCH 054/130] Fixing a bug in Elu

---
 tensorflow/core/kernels/mkl_conv_ops.cc       | 20 ++++++++--------
 tensorflow/core/kernels/mkl_fused_ops_test.cc | 23 +++++++++++--------
 2 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc
index f5a037f8f2988f..d23f84fc059b61 100644
--- a/tensorflow/core/kernels/mkl_conv_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_ops.cc
@@ -24,8 +24,8 @@ limitations under the License.
 #include <map>
 #include <vector>
 
-#include "mkldnn.hpp"
 #include "absl/strings/str_join.h"
+#include "mkldnn.hpp"
 #include "tensorflow/core/framework/bounds_check.h"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
@@ -570,17 +570,15 @@ class MklConvOp : public OpKernel {
       OP_REQUIRES(context, dilations_.size() == 5,
                   errors::InvalidArgument("Dilation rates field must "
                                           "specify 5 dimensions"));
-      OP_REQUIRES(context,
-                  (GetTensorDim(dilations_, data_format_, 'N') == 1 &&
-                   GetTensorDim(dilations_, data_format_, 'C') == 1),
+      OP_REQUIRES(context, (GetTensorDim(dilations_, data_format_, 'N') == 1 &&
+                            GetTensorDim(dilations_, data_format_, 'C') == 1),
                   errors::InvalidArgument(
                       "Current implementation does not yet support "
                       "dilations rates in the batch and depth dimensions."));
       OP_REQUIRES(
-          context,
-          (GetTensorDim(dilations_, data_format_, '0') > 0 &&
-           GetTensorDim(dilations_, data_format_, '1') > 0 &&
-           GetTensorDim(dilations_, data_format_, '2') > 0),
+          context, (GetTensorDim(dilations_, data_format_, '0') > 0 &&
+                    GetTensorDim(dilations_, data_format_, '1') > 0 &&
+                    GetTensorDim(dilations_, data_format_, '2') > 0),
           errors::InvalidArgument("Dilated rates should be larger than 0."));
     }
   }
@@ -1350,7 +1348,7 @@ class MklFusedConvOp
     } else if (fused_ops == std::vector<string>{"Relu6"}) {
       this->set_fuse_activation(true, ALGORITHM::eltwise_bounded_relu, 6.0);
     } else if (fused_ops == std::vector<string>{"Elu"}) {
-      this->set_fuse_activation(true, ALGORITHM::eltwise_elu);
+      this->set_fuse_activation(true, ALGORITHM::eltwise_elu, 1.0);
     } else if (fused_ops == std::vector<string>{"BiasAdd", "Relu"}) {
       this->set_fuse_biasadd(true);
       this->set_fuse_activation(true, ALGORITHM::eltwise_relu);
@@ -1365,7 +1363,7 @@ class MklFusedConvOp
                       "Fused Conv2D must have one extra argument: bias."));
     } else if (fused_ops == std::vector<string>{"BiasAdd", "Elu"}) {
       this->set_fuse_biasadd(true);
-      this->set_fuse_activation(true, ALGORITHM::eltwise_elu);
+      this->set_fuse_activation(true, ALGORITHM::eltwise_elu, 1.0);
       OP_REQUIRES(context, num_args == 1,
                   errors::InvalidArgument(
                       "Fused Conv2D must have one extra argument: bias."));
@@ -1395,7 +1393,7 @@ class MklFusedConvOp
     } else if (fused_ops == std::vector<string>{"BiasAdd", "Add", "Elu"}) {
       this->set_fuse_biasadd(true);
       this->set_fuse_add(true);
-      this->set_fuse_activation(true, ALGORITHM::eltwise_elu);
+      this->set_fuse_activation(true, ALGORITHM::eltwise_elu, 1.0);
       OP_REQUIRES(
           context, num_args == 2,
           errors::InvalidArgument(
diff --git a/tensorflow/core/kernels/mkl_fused_ops_test.cc b/tensorflow/core/kernels/mkl_fused_ops_test.cc
index 863e4ea0d3ab99..a92a4f38f54dae 100644
--- a/tensorflow/core/kernels/mkl_fused_ops_test.cc
+++ b/tensorflow/core/kernels/mkl_fused_ops_test.cc
@@ -110,14 +110,14 @@ class CommonTestUtilities : public OpsTestBase {
     DataType dtype = DataTypeToEnum<T>::v();
 
     Tensor image(dtype, {image_batch_count, image_height, image_width, depth});
-    image.flat<T>() = image.flat<T>().setRandom();
+    image.flat<T>() = image.flat<T>().template setRandom<random_gen_>();
 
     Tensor filter(dtype, {filter_size, filter_size, depth, filter_count});
-    filter.flat<T>() = filter.flat<T>().setRandom();
+    filter.flat<T>() = filter.flat<T>().template setRandom<random_gen_>();
 
     const int bias_size = filter_count;
     Tensor bias(dtype, {bias_size});
-    bias.flat<T>() = bias.flat<T>().setRandom();
+    bias.flat<T>() = bias.flat<T>().template setRandom<random_gen_>();
 
     Tensor conv_2d;
     Tensor fused_conv_2d;
@@ -140,14 +140,14 @@ class CommonTestUtilities : public OpsTestBase {
     DataType dtype = DataTypeToEnum<T>::v();
 
     Tensor image(dtype, {image_batch_count, image_height, image_width, depth});
-    image.flat<T>() = image.flat<T>().setRandom();
+    image.flat<T>() = image.flat<T>().template setRandom<random_gen_>();
 
     Tensor filter(dtype, {filter_size, filter_size, depth, filter_count});
-    filter.flat<T>() = filter.flat<T>().setRandom();
+    filter.flat<T>() = filter.flat<T>().template setRandom<random_gen_>();
 
     const int bias_size = filter_count;
     Tensor bias(dtype, {bias_size});
-    bias.flat<T>() = bias.flat<T>().setRandom();
+    bias.flat<T>() = bias.flat<T>().template setRandom<random_gen_>();
 
     Tensor conv_2d;
     Tensor fused_conv_2d;
@@ -168,13 +168,13 @@ class CommonTestUtilities : public OpsTestBase {
     DataType dtype = DataTypeToEnum<T>::v();
 
     Tensor input(dtype, {batch, depth});
-    input.flat<T>() = input.flat<T>().setRandom();
+    input.flat<T>() = input.flat<T>().template setRandom<random_gen_>();
 
     Tensor weight(dtype, {depth, weight_count});
-    weight.flat<T>() = weight.flat<T>().setRandom();
+    weight.flat<T>() = weight.flat<T>().template setRandom<random_gen_>();
 
     Tensor bias(dtype, {weight_count});
-    bias.flat<T>() = bias.flat<T>().setRandom();
+    bias.flat<T>() = bias.flat<T>().template setRandom<random_gen_>();
 
     Tensor output;
     Tensor fused_output;
@@ -187,6 +187,9 @@ class CommonTestUtilities : public OpsTestBase {
 
     test::ExpectClose(output, fused_output, 1e-5);
   }
+
+  private:
+   using random_gen_ = Eigen::internal::NormalRandomGenerator<T>;
 };
 
 // Testing MKL's fused convolution ops
@@ -242,7 +245,7 @@ class MklFusedConv2DOpTest : public OpsTestBase {
     if (std::find(fused_ops.begin(), fused_ops.end(), "Elu") !=
         fused_ops.end()) {
       last_op = "with_elu";
-      next_op = ops::Relu(root.WithOpName(last_op), next_op);
+      next_op = ops::Elu(root.WithOpName(last_op), next_op);
     }
 
     CommonTestUtilities<T>::RunAndFetch(root, last_op, output);

From 4f165abf40707c5569ee4cbd23852fe171eee7e4 Mon Sep 17 00:00:00 2001
From: Haoliang Zhang <haoliang@google.com>
Date: Wed, 27 Nov 2019 13:34:01 -0800
Subject: [PATCH 055/130] Update op version map for tf 2.1 RC0.

PiperOrigin-RevId: 282826021
Change-Id: If60097ccff777dae027600364561c3865af176fd
---
 tensorflow/lite/toco/tflite/op_version.cc | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/lite/toco/tflite/op_version.cc b/tensorflow/lite/toco/tflite/op_version.cc
index 39258339e0e27c..a7a829e77e368c 100644
--- a/tensorflow/lite/toco/tflite/op_version.cc
+++ b/tensorflow/lite/toco/tflite/op_version.cc
@@ -74,7 +74,7 @@ string GetMinimumRuntimeVersionForModel(const Model& model) {
           {{OperatorType::kCast, 1}, "1.5.0"},
           {{OperatorType::kConcatenation, 1}, "1.5.0"},
           {{OperatorType::kConcatenation, 2}, "1.14.0"},
-          {{OperatorType::kDepthToSpace, 1}, kPendingReleaseOpVersion},
+          {{OperatorType::kDepthToSpace, 1}, "2.1.0"},
           {{OperatorType::kFakeQuant, 1}, "1.5.0"},
           {{OperatorType::kFakeQuant, 2}, "1.10.0"},
           {{OperatorType::kFullyConnected, 1}, "1.5.0"},
@@ -82,7 +82,7 @@ string GetMinimumRuntimeVersionForModel(const Model& model) {
           {{OperatorType::kFullyConnected, 3}, "1.14.0"},
           {{OperatorType::kFullyConnected, 4}, "1.14.0"},
           {{OperatorType::kFullyConnected, 5}, "2.0.0"},
-          {{OperatorType::kFullyConnected, 6}, kPendingReleaseOpVersion},
+          {{OperatorType::kFullyConnected, 6}, "2.1.0"},
           {{OperatorType::kGather, 1}, "1.6.0"},
           {{OperatorType::kGather, 2}, "1.14.0"},
           {{OperatorType::kGather, 3}, "1.15.0"},
@@ -145,7 +145,7 @@ string GetMinimumRuntimeVersionForModel(const Model& model) {
           {{OperatorType::kSplitV, 1}, "1.13.1"},
           {{OperatorType::kStridedSlice, 1}, "1.6.0"},
           {{OperatorType::kStridedSlice, 2}, "1.14.0"},
-          {{OperatorType::kStridedSlice, 3}, kPendingReleaseOpVersion},
+          {{OperatorType::kStridedSlice, 3}, "2.1.0"},
           {{OperatorType::kTopK_V2, 1}, "1.7.0"},
           {{OperatorType::kTopK_V2, 2}, "1.14.0"},
           {{OperatorType::kArgMax, 1}, "1.9.0"},
@@ -205,7 +205,7 @@ string GetMinimumRuntimeVersionForModel(const Model& model) {
           {{OperatorType::kElu, 1}, "1.14.0"},
           {{OperatorType::kRound, 1}, "1.14.0"},
           {{OperatorType::kRelu, 1}, "1.5.0"},
-          {{OperatorType::kRelu, 2}, kPendingReleaseOpVersion},
+          {{OperatorType::kRelu, 2}, "2.1.0"},
           {{OperatorType::kRelu1, 1}, "1.5.0"},
           {{OperatorType::kPRelu, 1}, "1.8.0"},
           {{OperatorType::kExp, 1}, "1.7.0"},

From 971a1881934f6878edaa5af753f29a0b300f04bb Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 27 Nov 2019 14:17:37 -0800
Subject: [PATCH 056/130] Set --incompatible_remove_legacy_whole_archive to
 False A roll-forward of cl/281126040 The windows build failure that caused
 the rollback is addressed in cl/282539273

PiperOrigin-RevId: 282833339
Change-Id: I36a4ea4b188880265a80cc52f229e26004b56b17
---
 .bazelrc | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/.bazelrc b/.bazelrc
index 638bb39fafd66e..9aca8b4261e870 100644
--- a/.bazelrc
+++ b/.bazelrc
@@ -137,8 +137,19 @@ build --announce_rc
 # Other build flags.
 build --define=grpc_no_ares=true
 
-# Prevent regression of https://github.com/bazelbuild/bazel/issues/7362
-build --incompatible_remove_legacy_whole_archive
+# See https://github.com/bazelbuild/bazel/issues/7362 for information on what
+# --incompatible_remove_legacy_whole_archive flag does.
+# This flag is set to true in Bazel 1.0 and newer versions. We tried to migrate
+# Tensorflow to the default, however test coverage wasn't enough to catch the
+# errors.
+# There is ongoing work on Bazel team's side to provide support for transitive
+# shared libraries. As part of migrating to transitive shared libraries, we
+# hope to provide a better mechanism for control over symbol exporting, and
+# then tackle this issue again.
+#
+# TODO: Remove this line once TF doesn't depend on Bazel wrapping all library
+# archives in -whole_archive -no_whole_archive.
+build --noincompatible_remove_legacy_whole_archive
 
 # Modular TF build options
 build:dynamic_kernels --define=dynamic_loaded_kernels=true

From ddef31b4e0b00c4f55cd910d1781221ba2829fe0 Mon Sep 17 00:00:00 2001
From: Lukas Geiger <lukas.geiger94@gmail.com>
Date: Wed, 27 Nov 2019 02:01:27 +0000
Subject: [PATCH 057/130] Fix TensorFlow pip API generation

---
 tensorflow/api_template.__init__.py                  | 10 ++++++----
 tensorflow/api_template_v1.__init__.py               |  9 +++++----
 .../python/tools/api/generator/create_python_api.py  | 12 +++++++++---
 tensorflow/virtual_root_template_v1.__init__.py      |  3 ---
 tensorflow/virtual_root_template_v2.__init__.py      | 10 ----------
 5 files changed, 20 insertions(+), 24 deletions(-)

diff --git a/tensorflow/api_template.__init__.py b/tensorflow/api_template.__init__.py
index 56d65d45faf0b1..c515cc76b9aacd 100644
--- a/tensorflow/api_template.__init__.py
+++ b/tensorflow/api_template.__init__.py
@@ -119,11 +119,11 @@ def _running_from_pip_package():
       _current_file_location.startswith(dir_) for dir_ in _site_packages_dirs)
 
 if _running_from_pip_package():
-  for s in _site_packages_dirs:
+  for _s in _site_packages_dirs:
     # TODO(gunan): Add sanity checks to loaded modules here.
-    plugin_dir = _os.path.join(s, 'tensorflow-plugins')
-    if _fi.file_exists(plugin_dir):
-      _ll.load_library(plugin_dir)
+    _plugin_dir = _os.path.join(_s, 'tensorflow-plugins')
+    if _fi.file_exists(_plugin_dir):
+      _ll.load_library(_plugin_dir)
 
 # Add module aliases
 if hasattr(_current_module, 'keras'):
@@ -136,3 +136,5 @@ def _running_from_pip_package():
   setattr(_current_module, "optimizers", optimizers)
   setattr(_current_module, "initializers", initializers)
 # pylint: enable=undefined-variable
+
+# __all__ PLACEHOLDER
diff --git a/tensorflow/api_template_v1.__init__.py b/tensorflow/api_template_v1.__init__.py
index 97478a18b8a20a..2b2899c3fe031e 100644
--- a/tensorflow/api_template_v1.__init__.py
+++ b/tensorflow/api_template_v1.__init__.py
@@ -132,9 +132,10 @@ def _running_from_pip_package():
       _current_file_location.startswith(dir_) for dir_ in _site_packages_dirs)
 
 if _running_from_pip_package():
-  for s in _site_packages_dirs:
+  for _s in _site_packages_dirs:
     # TODO(gunan): Add sanity checks to loaded modules here.
-    plugin_dir = _os.path.join(s, 'tensorflow-plugins')
-    if _fi.file_exists(plugin_dir):
-      _ll.load_library(plugin_dir)
+    _plugin_dir = _os.path.join(_s, 'tensorflow-plugins')
+    if _fi.file_exists(_plugin_dir):
+      _ll.load_library(_plugin_dir)
 
+# __all__ PLACEHOLDER
diff --git a/tensorflow/python/tools/api/generator/create_python_api.py b/tensorflow/python/tools/api/generator/create_python_api.py
index 3af677322d67ee..80f663683c3ee0 100644
--- a/tensorflow/python/tools/api/generator/create_python_api.py
+++ b/tensorflow/python/tools/api/generator/create_python_api.py
@@ -243,11 +243,12 @@ def build(self):
     # from it using * import. Don't need this for lazy_loading because the
     # underscore symbols are already included in __all__ when passed in and
     # handled by TFModuleWrapper.
+    root_module_footer = ''
     if not self._lazy_loading:
       underscore_names_str = ', '.join(
           '\'%s\'' % name for name in self._underscore_names_in_root)
 
-      module_text_map[''] = module_text_map.get('', '') + '''
+      root_module_footer = '''
 _names_with_underscore = [%s]
 __all__ = [_s for _s in dir() if not _s.startswith('_')]
 __all__.extend([_s for _s in _names_with_underscore])
@@ -273,7 +274,7 @@ def build(self):
         footer_text_map[dest_module] = _DEPRECATION_FOOTER % (
             dest_module, public_apis_name, deprecation, has_lite)
 
-    return module_text_map, footer_text_map
+    return module_text_map, footer_text_map, root_module_footer
 
   def format_import(self, source_module_name, source_name, dest_name):
     """Formats import statement.
@@ -620,7 +621,11 @@ def create_api_files(output_files, packages, root_init_template, output_dir,
       os.makedirs(os.path.dirname(file_path))
     open(file_path, 'a').close()
 
-  module_text_map, deprecation_footer_map = get_api_init_text(
+  (
+      module_text_map,
+      deprecation_footer_map,
+      root_module_footer,
+  ) = get_api_init_text(
       packages, output_package, api_name,
       api_version, compat_api_versions, lazy_loading, use_relative_imports)
 
@@ -652,6 +657,7 @@ def create_api_files(output_files, packages, root_init_template, output_dir,
       with open(root_init_template, 'r') as root_init_template_file:
         contents = root_init_template_file.read()
         contents = contents.replace('# API IMPORTS PLACEHOLDER', text)
+        contents = contents.replace('# __all__ PLACEHOLDER', root_module_footer)
     elif module in compat_module_to_template:
       # Read base init file for compat module
       with open(compat_module_to_template[module], 'r') as init_template_file:
diff --git a/tensorflow/virtual_root_template_v1.__init__.py b/tensorflow/virtual_root_template_v1.__init__.py
index 236e9f52258973..9a45bc0355d0b7 100644
--- a/tensorflow/virtual_root_template_v1.__init__.py
+++ b/tensorflow/virtual_root_template_v1.__init__.py
@@ -132,7 +132,4 @@ def _forward_module(old_name):
 except NameError:
   pass
 
-# Manually patch keras and estimator so tf.keras and tf.estimator work
-keras = _sys.modules["tensorflow.keras"]
-if not _root_estimator: estimator = _sys.modules["tensorflow.estimator"]
 # LINT.ThenChange(//tensorflow/virtual_root_template_v2.__init__.py.oss)
diff --git a/tensorflow/virtual_root_template_v2.__init__.py b/tensorflow/virtual_root_template_v2.__init__.py
index 83c020182a8ee9..bd8c903e455db5 100644
--- a/tensorflow/virtual_root_template_v2.__init__.py
+++ b/tensorflow/virtual_root_template_v2.__init__.py
@@ -126,14 +126,4 @@ def _forward_module(old_name):
 except NameError:
   pass
 
-# TODO(mihaimaruseac): Revisit all of this once we release 2.1
-# Manually patch keras and estimator so tf.keras and tf.estimator work
-keras = _sys.modules["tensorflow.keras"]
-if not _root_estimator: estimator = _sys.modules["tensorflow.estimator"]
-# Also import module aliases
-try:
-  from tensorflow_core import losses, metrics, initializers, optimizers
-except ImportError:
-  pass
-
 # LINT.ThenChange(//tensorflow/virtual_root_template_v1.__init__.py.oss)

From 33340d137af692c0b6d3c863e376acf1a165d68d Mon Sep 17 00:00:00 2001
From: Lukas Geiger <lukas.geiger94@gmail.com>
Date: Wed, 27 Nov 2019 18:35:45 +0000
Subject: [PATCH 058/130] Fix create_python_api_test.py

---
 .../tools/api/generator/create_python_api_test.py      | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tensorflow/python/tools/api/generator/create_python_api_test.py b/tensorflow/python/tools/api/generator/create_python_api_test.py
index 010f189dcb27f0..76404d6c82b33b 100644
--- a/tensorflow/python/tools/api/generator/create_python_api_test.py
+++ b/tensorflow/python/tools/api/generator/create_python_api_test.py
@@ -62,7 +62,7 @@ def tearDown(self):
     del sys.modules[_MODULE_NAME]
 
   def testFunctionImportIsAdded(self):
-    imports, _ = create_python_api.get_api_init_text(
+    imports, _, _ = create_python_api.get_api_init_text(
         packages=[create_python_api._DEFAULT_PACKAGE],
         output_package='tensorflow',
         api_name='tensorflow',
@@ -97,7 +97,7 @@ def testFunctionImportIsAdded(self):
                      msg='compat.v1 in %s' % str(imports.keys()))
 
   def testClassImportIsAdded(self):
-    imports, _ = create_python_api.get_api_init_text(
+    imports, _, _ = create_python_api.get_api_init_text(
         packages=[create_python_api._DEFAULT_PACKAGE],
         output_package='tensorflow',
         api_name='tensorflow',
@@ -116,7 +116,7 @@ def testClassImportIsAdded(self):
         msg='%s not in %s' % (expected_import, str(imports)))
 
   def testConstantIsAdded(self):
-    imports, _ = create_python_api.get_api_init_text(
+    imports, _, _ = create_python_api.get_api_init_text(
         packages=[create_python_api._DEFAULT_PACKAGE],
         output_package='tensorflow',
         api_name='tensorflow',
@@ -132,7 +132,7 @@ def testConstantIsAdded(self):
                     msg='%s not in %s' % (expected, str(imports)))
 
   def testCompatModuleIsAdded(self):
-    imports, _ = create_python_api.get_api_init_text(
+    imports, _, _ = create_python_api.get_api_init_text(
         packages=[create_python_api._DEFAULT_PACKAGE],
         output_package='tensorflow',
         api_name='tensorflow',
@@ -144,7 +144,7 @@ def testCompatModuleIsAdded(self):
                     msg='compat.v1.test not in %s' % str(imports.keys()))
 
   def testNestedCompatModulesAreAdded(self):
-    imports, _ = create_python_api.get_api_init_text(
+    imports, _, _ = create_python_api.get_api_init_text(
         packages=[create_python_api._DEFAULT_PACKAGE],
         output_package='tensorflow',
         api_name='tensorflow',

From dbf773df028bf60a07268eebae7bd3cbc48b5e51 Mon Sep 17 00:00:00 2001
From: Brian Zhao <bmzhao@google.com>
Date: Tue, 19 Nov 2019 11:39:01 -0800
Subject: [PATCH 059/130] Fix for
 https://github.com/tensorflow/tensorflow/issues/34117

Bazel's change to legacy_whole_archive behavior is not the cause for TF's linking issues with protobuf. Protobuf's implementation and runtime are correctly being linked into TF here: https://github.com/tensorflow/tensorflow/blob/da5765ebad2e1d3c25d11ee45aceef0b60da499f/tensorflow/core/platform/default/build_config.bzl#L239 and https://github.com/tensorflow/tensorflow/blob/da5765ebad2e1d3c25d11ee45aceef0b60da499f/third_party/protobuf/protobuf.patch#L18, and I've confirmed that protobuf symbols are still present in libtensorflow_framework.so via nm.

After examining the linker flags that bazel passes to gcc, https://gist.github.com/bmzhao/f51bbdef50e9db9b24acd5b5acc95080, I discovered that the order of the linker flags was what was causing the undefined reference.

See https://eli.thegreenplace.net/2013/07/09/library-order-in-static-linking/ and https://stackoverflow.com/a/12272890. Basically linkers discard the objects they've been asked to link if those objects do not export any symbols that the linker currently has kept track as "undefined".

To prove this was the issue, I was able to successfully link after moving the linking shared object flag (-l:libtensorflow_framework.so.2) to the bottom of the flag order, and manually invoking g++.

This change uses cc_import to to link against a .so in the "deps" of tf_cc_binary, rather than as the "srcs" of tf_cc_binary. This technique was inspired by the comment here: https://github.com/bazelbuild/bazel/blob/387c610d09b99536f7f5b8ecb883d14ee6063fdd/examples/windows/dll/windows_dll_library.bzl#L47-L48

Successfully built on vanilla Ubuntu 18.04 VM:
bmzhao@bmzhao-tf-build-failure-reproing:~/tf-fix/tf$ bazel build -c opt --config=cuda --config=v2 --host_force_python=PY3 //tensorflow/tools/pip_package:build_pip_package
Target //tensorflow/tools/pip_package:build_pip_package up-to-date:
  bazel-bin/tensorflow/tools/pip_package/build_pip_package
INFO: Elapsed time: 2067.380s, Critical Path: 828.19s
INFO: 12942 processes: 51 remote cache hit, 12891 local.
INFO: Build completed successfully, 14877 total actions

The root cause might instead be https://github.com/bazelbuild/bazel/issues/7687, which is pending further investigation.

PiperOrigin-RevId: 281341817
Change-Id: Ia240eb050d9514ed5ac95b7b5fb7e0e98b7d1e83
---
 tensorflow/BUILD          | 12 ++++++++++++
 tensorflow/tensorflow.bzl |  5 +++++
 2 files changed, 17 insertions(+)

diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index 43585f0ed3e4e1..2ccb9854622282 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -607,6 +607,18 @@ tf_cc_shared_object(
     ] + tf_additional_binary_deps(),
 )
 
+# This is intended to be the same as tf_binary_additional_srcs:
+# https://github.com/tensorflow/tensorflow/blob/cd67f4f3723f9165aabedd0171aaadc6290636e5/tensorflow/tensorflow.bzl#L396-L425
+# And is usable in the "deps" attribute instead of the "srcs" attribute
+# as a workaround for https://github.com/tensorflow/tensorflow/issues/34117
+cc_import(
+    name = "libtensorflow_framework_import_lib",
+    shared_library = select({
+        "//tensorflow:macos": ":libtensorflow_framework.dylib",
+        "//conditions:default": ":libtensorflow_framework.so",
+    }),
+)
+
 # -------------------------------------------
 # New rules should be added above this target.
 # -------------------------------------------
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index 9ac585256148f1..dfa7192ba84a32 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -626,6 +626,11 @@ def tf_cc_binary(
                 [
                     clean_dep("//third_party/mkl:intel_binary_blob"),
                 ],
+            ) + if_static(
+                extra_deps = [],
+                otherwise = [
+                    clean_dep("//tensorflow:libtensorflow_framework_import_lib"),
+                ],
             ),
             data = depset(data + added_data_deps),
             linkopts = linkopts + _rpath_linkopts(name_os),

From 9358b96292df2621fcc7659bc4ac23c97d9d69c9 Mon Sep 17 00:00:00 2001
From: Brian Atkinson <bca@google.com>
Date: Mon, 25 Nov 2019 13:50:34 -0800
Subject: [PATCH 060/130] Add a redirection point to core/platform for
 build_config.bzl

This is just the redirection point. Changing to make use of it is coming in
another CL.

PiperOrigin-RevId: 282425960
Change-Id: I5b12fe759e58e408246d31a8de83406dc93e1852
---
 tensorflow/core/platform/build_config.bzl | 70 +++++++++++++++++++++++
 1 file changed, 70 insertions(+)
 create mode 100644 tensorflow/core/platform/build_config.bzl

diff --git a/tensorflow/core/platform/build_config.bzl b/tensorflow/core/platform/build_config.bzl
new file mode 100644
index 00000000000000..03a67e9b789d1b
--- /dev/null
+++ b/tensorflow/core/platform/build_config.bzl
@@ -0,0 +1,70 @@
+"""Provides a redirection point for platform specific implementations of starlark utilities."""
+
+load(
+    "//tensorflow/core/platform:default/build_config.bzl",
+    _pyx_library = "pyx_library",
+    _tf_additional_all_protos = "tf_additional_all_protos",
+    _tf_additional_binary_deps = "tf_additional_binary_deps",
+    _tf_additional_core_deps = "tf_additional_core_deps",
+    _tf_additional_cupti_test_flags = "tf_additional_cupti_test_flags",
+    _tf_additional_cupti_utils_cuda_deps = "tf_additional_cupti_utils_cuda_deps",
+    _tf_additional_device_tracer_srcs = "tf_additional_device_tracer_srcs",
+    _tf_additional_lib_deps = "tf_additional_lib_deps",
+    _tf_additional_lib_hdrs = "tf_additional_lib_hdrs",
+    _tf_additional_lib_srcs = "tf_additional_lib_srcs",
+    _tf_additional_monitoring_hdrs = "tf_additional_monitoring_hdrs",
+    _tf_additional_monitoring_srcs = "tf_additional_monitoring_srcs",
+    _tf_additional_proto_hdrs = "tf_additional_proto_hdrs",
+    _tf_additional_rpc_deps = "tf_additional_rpc_deps",
+    _tf_additional_tensor_coding_deps = "tf_additional_tensor_coding_deps",
+    _tf_additional_test_deps = "tf_additional_test_deps",
+    _tf_additional_test_srcs = "tf_additional_test_srcs",
+    _tf_fingerprint_deps = "tf_fingerprint_deps",
+    _tf_jspb_proto_library = "tf_jspb_proto_library",
+    _tf_kernel_tests_linkstatic = "tf_kernel_tests_linkstatic",
+    _tf_lib_proto_parsing_deps = "tf_lib_proto_parsing_deps",
+    _tf_proto_library = "tf_proto_library",
+    _tf_proto_library_cc = "tf_proto_library_cc",
+    _tf_proto_library_py = "tf_proto_library_py",
+    _tf_protobuf_compiler_deps = "tf_protobuf_compiler_deps",
+    _tf_protobuf_deps = "tf_protobuf_deps",
+    _tf_protos_all = "tf_protos_all",
+    _tf_protos_all_impl = "tf_protos_all_impl",
+    _tf_protos_grappler = "tf_protos_grappler",
+    _tf_protos_grappler_impl = "tf_protos_grappler_impl",
+    _tf_py_clif_cc = "tf_py_clif_cc",
+    _tf_pyclif_proto_library = "tf_pyclif_proto_library",
+)
+
+pyx_library = _pyx_library
+tf_additional_all_protos = _tf_additional_all_protos
+tf_additional_binary_deps = _tf_additional_binary_deps
+tf_additional_core_deps = _tf_additional_core_deps
+tf_additional_cupti_test_flags = _tf_additional_cupti_test_flags
+tf_additional_cupti_utils_cuda_deps = _tf_additional_cupti_utils_cuda_deps
+tf_additional_device_tracer_srcs = _tf_additional_device_tracer_srcs
+tf_additional_lib_deps = _tf_additional_lib_deps
+tf_additional_lib_hdrs = _tf_additional_lib_hdrs
+tf_additional_lib_srcs = _tf_additional_lib_srcs
+tf_additional_monitoring_hdrs = _tf_additional_monitoring_hdrs
+tf_additional_monitoring_srcs = _tf_additional_monitoring_srcs
+tf_additional_proto_hdrs = _tf_additional_proto_hdrs
+tf_additional_rpc_deps = _tf_additional_rpc_deps
+tf_additional_tensor_coding_deps = _tf_additional_tensor_coding_deps
+tf_additional_test_deps = _tf_additional_test_deps
+tf_additional_test_srcs = _tf_additional_test_srcs
+tf_fingerprint_deps = _tf_fingerprint_deps
+tf_jspb_proto_library = _tf_jspb_proto_library
+tf_kernel_tests_linkstatic = _tf_kernel_tests_linkstatic
+tf_lib_proto_parsing_deps = _tf_lib_proto_parsing_deps
+tf_proto_library = _tf_proto_library
+tf_proto_library_cc = _tf_proto_library_cc
+tf_proto_library_py = _tf_proto_library_py
+tf_protobuf_compiler_deps = _tf_protobuf_compiler_deps
+tf_protobuf_deps = _tf_protobuf_deps
+tf_protos_all = _tf_protos_all
+tf_protos_all_impl = _tf_protos_all_impl
+tf_protos_grappler = _tf_protos_grappler
+tf_protos_grappler_impl = _tf_protos_grappler_impl
+tf_py_clif_cc = _tf_py_clif_cc
+tf_pyclif_proto_library = _tf_pyclif_proto_library

From b8cf717bac0bb4638685205fd6a0e5ed7e480817 Mon Sep 17 00:00:00 2001
From: Duncan Riach <duncan@nvidia.com>
Date: Thu, 5 Dec 2019 17:58:05 -0800
Subject: [PATCH 061/130] Add info about TF_DETERMINISTIC_OPS to version 2.1
 release notes

---
 RELEASE.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/RELEASE.md b/RELEASE.md
index 7b4f0f29f1a609..3996280298d3db 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -19,6 +19,7 @@ TensorFlow 2.1 will be the last TF release supporting Python 2. Python 2 support
   * Changes rebatching for `tf.data datasets` + distribution strategies for better performance.   Note that the dataset also behaves slightly differently, in that the rebatched dataset cardinality will always be a multiple of the number of replicas.
 * `TensorRT`
   * [TensorRT 6.0](https://developer.nvidia.com/tensorrt#tensorrt-whats-new) is now supported and enabled by default. This adds support for more TensorFlow ops including Conv3D, Conv3DBackpropInputV2, AvgPool3D, MaxPool3D, ResizeBilinear, and ResizeNearestNeighbor. In addition, the TensorFlow-TensorRT python conversion API is exported as `tf.experimental.tensorrt.Converter`.
+  * Environment variable `TF_DETERMINISTIC_OPS` added. When set to "true" or "1", this environment variable makes `tf.nn.bias_add` operate deterministically (i.e. reproducibly). It also makes cuDNN convolution and max-pooling operate deterministically. This makes Keras Conv2D, Conv3D, MaxPool2D, and MaxPool3D layers operate deterministically in both the forward and backward directions.
 
 ## Known issues
 Because of [issues with building on windows](https://github.com/tensorflow/tensorflow/issues/10521), we turned off eigen strong inlining for the Windows builds. Windows binaries are expected to be slightly slower until the build issues are resolved. 

From 322e55bb71cfe915ba05fcf837805735a13c513c Mon Sep 17 00:00:00 2001
From: Mihai Maruseac <mihaimaruseac@google.com>
Date: Fri, 6 Dec 2019 09:45:03 -0800
Subject: [PATCH 062/130] Add Estimator release notes.

---
 RELEASE.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/RELEASE.md b/RELEASE.md
index 7b4f0f29f1a609..8ba82c38e2874a 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -41,6 +41,7 @@ Because of [issues with building on windows](https://github.com/tensorflow/tenso
   * Fix issue where GRU would crash or give incorrect output when a `tf.distribute.Strategy` was used. 
 * `tf.estimator`
   * Added option in `tf.estimator.CheckpointSaverHook` to not save the `GraphDef`.
+  * Moving the checkpoint reader from swig to pybind11.
 * `tf.keras`
   * Export depthwise_conv2d in `tf.keras.backend`.
   * In Keras Layers and Models, Variables in `trainable_weights`, `non_trainable_weights`, and `weights` are explicitly deduplicated.

From f2833a79ea4a01794ef5ebe72cdd22598b2db981 Mon Sep 17 00:00:00 2001
From: Duncan Riach <duncan@nvidia.com>
Date: Fri, 6 Dec 2019 11:22:04 -0800
Subject: [PATCH 063/130] Enhance description of TF_DETERMINISTIC_OPS in
 version 2.1 release notes.

---
 RELEASE.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/RELEASE.md b/RELEASE.md
index 3996280298d3db..b5c540f5b0db00 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -19,7 +19,7 @@ TensorFlow 2.1 will be the last TF release supporting Python 2. Python 2 support
   * Changes rebatching for `tf.data datasets` + distribution strategies for better performance.   Note that the dataset also behaves slightly differently, in that the rebatched dataset cardinality will always be a multiple of the number of replicas.
 * `TensorRT`
   * [TensorRT 6.0](https://developer.nvidia.com/tensorrt#tensorrt-whats-new) is now supported and enabled by default. This adds support for more TensorFlow ops including Conv3D, Conv3DBackpropInputV2, AvgPool3D, MaxPool3D, ResizeBilinear, and ResizeNearestNeighbor. In addition, the TensorFlow-TensorRT python conversion API is exported as `tf.experimental.tensorrt.Converter`.
-  * Environment variable `TF_DETERMINISTIC_OPS` added. When set to "true" or "1", this environment variable makes `tf.nn.bias_add` operate deterministically (i.e. reproducibly). It also makes cuDNN convolution and max-pooling operate deterministically. This makes Keras Conv2D, Conv3D, MaxPool2D, and MaxPool3D layers operate deterministically in both the forward and backward directions.
+  * Environment variable `TF_DETERMINISTIC_OPS` added. When set to "true" or "1", this environment variable makes `tf.nn.bias_add` operate deterministically (i.e. reproducibly) when XLA JIT compilation is *not* enabled. It also makes cuDNN convolution and max-pooling operate deterministically. This makes Keras Conv*D and MaxPool*D layers operate deterministically in both the forward and backward directions when running on CUDA-enabled GPU.
 
 ## Known issues
 Because of [issues with building on windows](https://github.com/tensorflow/tensorflow/issues/10521), we turned off eigen strong inlining for the Windows builds. Windows binaries are expected to be slightly slower until the build issues are resolved. 

From 544e1a1a7f139938a4f398eeeaa9eb1c74b6e0aa Mon Sep 17 00:00:00 2001
From: Duncan Riach <duncan@nvidia.com>
Date: Fri, 6 Dec 2019 11:25:24 -0800
Subject: [PATCH 064/130] Fix a small typo in version 2.1 release notes.

---
 RELEASE.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/RELEASE.md b/RELEASE.md
index b5c540f5b0db00..b5f0886be86c24 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -19,7 +19,7 @@ TensorFlow 2.1 will be the last TF release supporting Python 2. Python 2 support
   * Changes rebatching for `tf.data datasets` + distribution strategies for better performance.   Note that the dataset also behaves slightly differently, in that the rebatched dataset cardinality will always be a multiple of the number of replicas.
 * `TensorRT`
   * [TensorRT 6.0](https://developer.nvidia.com/tensorrt#tensorrt-whats-new) is now supported and enabled by default. This adds support for more TensorFlow ops including Conv3D, Conv3DBackpropInputV2, AvgPool3D, MaxPool3D, ResizeBilinear, and ResizeNearestNeighbor. In addition, the TensorFlow-TensorRT python conversion API is exported as `tf.experimental.tensorrt.Converter`.
-  * Environment variable `TF_DETERMINISTIC_OPS` added. When set to "true" or "1", this environment variable makes `tf.nn.bias_add` operate deterministically (i.e. reproducibly) when XLA JIT compilation is *not* enabled. It also makes cuDNN convolution and max-pooling operate deterministically. This makes Keras Conv*D and MaxPool*D layers operate deterministically in both the forward and backward directions when running on CUDA-enabled GPU.
+  * Environment variable `TF_DETERMINISTIC_OPS` added. When set to "true" or "1", this environment variable makes `tf.nn.bias_add` operate deterministically (i.e. reproducibly) when XLA JIT compilation is *not* enabled. It also makes cuDNN convolution and max-pooling operate deterministically. This makes Keras Conv*D and MaxPool*D layers operate deterministically in both the forward and backward directions when running on a CUDA-enabled GPU.
 
 ## Known issues
 Because of [issues with building on windows](https://github.com/tensorflow/tensorflow/issues/10521), we turned off eigen strong inlining for the Windows builds. Windows binaries are expected to be slightly slower until the build issues are resolved. 

From ac061f05c6ce78463787bcc821d5f86d77eff837 Mon Sep 17 00:00:00 2001
From: TensorFlow Release Automation <jenkins@tensorflow.org>
Date: Fri, 6 Dec 2019 13:34:29 -0800
Subject: [PATCH 065/130] Update version numbers to 2.1.0-rc1

---
 tensorflow/core/public/version.h      | 2 +-
 tensorflow/tools/pip_package/setup.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h
index 04f45726cbf1f8..feeaec99e3f3fd 100644
--- a/tensorflow/core/public/version.h
+++ b/tensorflow/core/public/version.h
@@ -26,7 +26,7 @@ limitations under the License.
 
 // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
 // "-beta", "-rc", "-rc.1")
-#define TF_VERSION_SUFFIX "-rc0"
+#define TF_VERSION_SUFFIX "-rc1"
 
 #define TF_STR_HELPER(x) #x
 #define TF_STR(x) TF_STR_HELPER(x)
diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index 663afadaa7cb70..665a3825a2cc8e 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -47,7 +47,7 @@
 # result for pip.
 # Also update tensorflow/tensorflow.bzl and
 # tensorflow/core/public/version.h
-_VERSION = '2.1.0-rc0'
+_VERSION = '2.1.0-rc1'
 
 REQUIRED_PACKAGES = [
     'absl-py >= 0.7.0',

From 6aa176247e75e576506bf45f761549d1dab27c30 Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Tue, 3 Dec 2019 14:44:49 -0800
Subject: [PATCH 066/130] Fix floating point golden test for sigmoid.

PiperOrigin-RevId: 283626203
Change-Id: I64ed3747b40e18c09520556e642b5826367cbd4e
---
 tensorflow/python/keras/activations.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/keras/activations.py b/tensorflow/python/keras/activations.py
index 6606f76f40d6d8..0e3d9ed7e3d690 100644
--- a/tensorflow/python/keras/activations.py
+++ b/tensorflow/python/keras/activations.py
@@ -260,9 +260,8 @@ def sigmoid(x):
 
   >>> a = tf.constant([-20, -1.0, 0.0, 1.0, 20], dtype = tf.float32)
   >>> b = tf.keras.activations.sigmoid(a)
-  >>> b.numpy()
-  array([0.        , 0.26894143, 0.5       , 0.7310586 , 1.        ],
-         dtype=float32)
+  >>> b.numpy() > 0.0
+  array([False,  True,  True,  True,  True])
 
   Arguments:
       x: Input tensor.

From c669ae5a73825f4086568e0b45e8007099b3fb99 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 6 Dec 2019 11:42:25 -0800
Subject: [PATCH 067/130] Update Eigen to
 https://gitlab.com/libeigen/eigen/commit/4e696901f873a2347f76d931cf2f701e31e15d05

PiperOrigin-RevId: 284229330
Change-Id: I5cc4bbe373cfef69bc9664ed5c56b86dc71de6d1
---
 .../eigen_tensor_reduced_instantiations_google.h          | 2 --
 .../optimized/eigen_tensor_reduced_instantiations_oss.h   | 2 --
 tensorflow/python/keras/activations.py                    | 4 ++--
 tensorflow/workspace.bzl                                  | 8 ++++----
 4 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/tensorflow/lite/kernels/internal/optimized/eigen_tensor_reduced_instantiations_google.h b/tensorflow/lite/kernels/internal/optimized/eigen_tensor_reduced_instantiations_google.h
index de10f2c9259e99..1eb65c5bd5c9a4 100644
--- a/tensorflow/lite/kernels/internal/optimized/eigen_tensor_reduced_instantiations_google.h
+++ b/tensorflow/lite/kernels/internal/optimized/eigen_tensor_reduced_instantiations_google.h
@@ -91,7 +91,6 @@ typedef unsigned __int64 uint64_t;
 #include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h"
 #include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h"
 #include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h"
-#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h"
 #include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h"
 #include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h"
 #include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h"
@@ -149,7 +148,6 @@ typedef unsigned __int64 uint64_t;
 #include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h"
 #include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h"
 #include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h"
-#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h"
 #include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h"
 #include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h"
 #include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h"
diff --git a/tensorflow/lite/kernels/internal/optimized/eigen_tensor_reduced_instantiations_oss.h b/tensorflow/lite/kernels/internal/optimized/eigen_tensor_reduced_instantiations_oss.h
index 5b54024ac5a0e4..027dd479af5cf9 100644
--- a/tensorflow/lite/kernels/internal/optimized/eigen_tensor_reduced_instantiations_oss.h
+++ b/tensorflow/lite/kernels/internal/optimized/eigen_tensor_reduced_instantiations_oss.h
@@ -91,7 +91,6 @@ typedef unsigned __int64 uint64_t;
 #include "unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h"
 #include "unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h"
 #include "unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h"
-#include "unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h"
 #include "unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h"
 #include "unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h"
 #include "unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h"
@@ -149,7 +148,6 @@ typedef unsigned __int64 uint64_t;
 #include "unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h"
 #include "unsupported/Eigen/CXX11/src/Tensor/TensorScan.h"
 #include "unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h"
-#include "unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h"
 #include "unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h"
 #include "unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h"
 #include "unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h"
diff --git a/tensorflow/python/keras/activations.py b/tensorflow/python/keras/activations.py
index 0e3d9ed7e3d690..f56f1c7e2e7b0a 100644
--- a/tensorflow/python/keras/activations.py
+++ b/tensorflow/python/keras/activations.py
@@ -260,8 +260,8 @@ def sigmoid(x):
 
   >>> a = tf.constant([-20, -1.0, 0.0, 1.0, 20], dtype = tf.float32)
   >>> b = tf.keras.activations.sigmoid(a)
-  >>> b.numpy() > 0.0
-  array([False,  True,  True,  True,  True])
+  >>> b.numpy() >= 0.0
+  array([ True,  True,  True,  True,  True])
 
   Arguments:
       x: Input tensor.
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 47dfc9eb600f17..77e605fe76a6aa 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -171,11 +171,11 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""):
         name = "eigen_archive",
         build_file = clean_dep("//third_party:eigen.BUILD"),
         patch_file = clean_dep("//third_party/eigen3:gpu_packet_math.patch"),
-        sha256 = "091d1a3124ea41ac2e70e30028365d78d43a1c617a26445aef15e140e4fab1dd",
-        strip_prefix = "eigen-eigen-afc120bc03bd",
+        sha256 = "65d732985b593b553c20566e1f236f48dcc626730c418aed7b2aa1d0e3f1a0af",
+        strip_prefix = "eigen-4e696901f873a2347f76d931cf2f701e31e15d05",
         urls = [
-            "https://storage.googleapis.com/mirror.tensorflow.org/bitbucket.org/eigen/eigen/get/afc120bc03bd.tar.gz",
-            "https://bitbucket.org/eigen/eigen/get/afc120bc03bd.tar.gz",
+            "https://storage.googleapis.com/mirror.tensorflow.org/gitlab.com/libeigen/eigen/-/archive/4e696901f873a2347f76d931cf2f701e31e15d05/eigen-4e696901f873a2347f76d931cf2f701e31e15d05.tar.gz",
+            "https://gitlab.com/libeigen/eigen/-/archive/4e696901f873a2347f76d931cf2f701e31e15d05/eigen-4e696901f873a2347f76d931cf2f701e31e15d05.tar.gz",
         ],
     )
 

From f6bd34fe1565cb340f7b892434982dd2f0900bda Mon Sep 17 00:00:00 2001
From: Ken Franko <kfranko@google.com>
Date: Fri, 6 Dec 2019 14:31:23 -0800
Subject: [PATCH 068/130] Group variable initialization when calling
 lift_to_graph.

When initializing variables defined inside a @tf.function which are lifted to the outer graph, group the variables together and call lift_to_graph once.  lift_to_graph supports passing in multiple tensors and the graph to lift to is the same for all of the variable initialization.  This improves setup time.

PiperOrigin-RevId: 284263511
Change-Id: I4cfcdb0394198df8f890a98295cc2fcb77b75413
---
 tensorflow/python/eager/def_function.py      |  9 ++++++++-
 tensorflow/python/eager/def_function_test.py | 13 +++++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/eager/def_function.py b/tensorflow/python/eager/def_function.py
index e7b4a6f84b2a5a..47e482cc754b79 100644
--- a/tensorflow/python/eager/def_function.py
+++ b/tensorflow/python/eager/def_function.py
@@ -728,13 +728,20 @@ def initialize_variables():
               resource_variable_ops.var_is_initialized_op(v.handle))
         var_is_initialized = array_ops.stack(var_is_initialized).numpy()
 
+      inits = []
       for (v, init), is_initialized in zip(initializers, var_is_initialized):
         with ops.init_scope():
           if is_initialized:
             continue
+        inits.append(init)
 
+      if inits:
         op_map = lift_to_graph.lift_to_graph(
-            [init], ops.get_default_graph(), op_map=op_map)
+            inits, ops.get_default_graph(), op_map=op_map)
+      for (v, init), is_initialized in zip(initializers, var_is_initialized):
+        with ops.init_scope():
+          if is_initialized:
+            continue
         v.assign(op_map[init], read_value=False)
 
     with ops.init_scope():
diff --git a/tensorflow/python/eager/def_function_test.py b/tensorflow/python/eager/def_function_test.py
index a38ba73cae45fb..0bebc89d2207e7 100644
--- a/tensorflow/python/eager/def_function_test.py
+++ b/tensorflow/python/eager/def_function_test.py
@@ -137,6 +137,19 @@ def fn(x):
 
     self.assertAllEqual(fn(constant_op.constant(1.0)), 2.0)
 
+  def testFunctionMultipleVariableInitializer(self):
+
+    state = []
+
+    @def_function.function
+    def fn(x):
+      if not state:
+        state.append(variables.Variable(lambda: 2.0))
+        state.append(variables.Variable(lambda: 5.0))
+      return state[0] * x, state[1] * x
+
+    self.assertAllEqual(fn(constant_op.constant(1.0)), [2.0, 5.0])
+
   def testFunctionInitializationFunction(self):
 
     state = []

From 98b1957300883d8680be2d233987bc9d7be1321f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 6 Dec 2019 14:22:22 -0800
Subject: [PATCH 069/130] Override EIGEN strong inline for release builds as
 well.

PiperOrigin-RevId: 284261705
Change-Id: I882c786169fb2d51716c884c9b1c91b59ae2df4e
---
 tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh | 2 +-
 tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
index 223c10fb0256dc..a64d5ef9c9c7d8 100644
--- a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
+++ b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
@@ -104,7 +104,7 @@ if [[ "$RELEASE_BUILD" == 1 ]]; then
   # Overriding eigen strong inline speeds up the compiling of conv_grad_ops_3d.cc and conv_ops_3d.cc
   # by 20 minutes. See https://github.com/tensorflow/tensorflow/issues/10521
   # Because this hurts the performance of TF, we don't override it in release build.
-  export TF_OVERRIDE_EIGEN_STRONG_INLINE=1
+  export TF_OVERRIDE_EIGEN_STRONG_INLINE=0
 else
   export TF_OVERRIDE_EIGEN_STRONG_INLINE=1
 fi
diff --git a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
index 7cf23775b649d3..299cbe32260e52 100644
--- a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
+++ b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
@@ -104,7 +104,7 @@ if [[ "$RELEASE_BUILD" == 1 ]]; then
   # Overriding eigen strong inline speeds up the compiling of conv_grad_ops_3d.cc and conv_ops_3d.cc
   # by 20 minutes. See https://github.com/tensorflow/tensorflow/issues/10521
   # Because this hurts the performance of TF, we don't override it in release build.
-  export TF_OVERRIDE_EIGEN_STRONG_INLINE=1
+  export TF_OVERRIDE_EIGEN_STRONG_INLINE=0
 else
   export TF_OVERRIDE_EIGEN_STRONG_INLINE=1
 fi

From 9b8b4c71a661c1f9f4b150e9a1545d559383d60c Mon Sep 17 00:00:00 2001
From: Goldie Gadde <ggadde@google.com>
Date: Fri, 6 Dec 2019 17:36:46 -0800
Subject: [PATCH 070/130] Update setup.py

---
 tensorflow/tools/pip_package/setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index 665a3825a2cc8e..f4afd1ad352b90 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -61,8 +61,8 @@
     'numpy >= 1.16.0, < 2.0',
     'opt_einsum >= 2.3.2',
     'protobuf >= 3.8.0',
-    'tensorboard >= 2.0.0, < 2.1.0',
-    'tensorflow_estimator >= 2.0.0, < 2.1.0',
+    'tensorboard >= 2.1.0, < 2.2.0',
+    'tensorflow_estimator >= 2.1.0rc0, < 2.2.0',
     'termcolor >= 1.1.0',
     'wrapt >= 1.11.1',
     # python3 requires wheel 0.26

From 8bdf0b9f8ed3ef80f847f8b9cff931c907622170 Mon Sep 17 00:00:00 2001
From: Goldie Gadde <ggadde@google.com>
Date: Fri, 6 Dec 2019 17:38:25 -0800
Subject: [PATCH 071/130] Update setup.py

---
 tensorflow/tools/pip_package/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index f4afd1ad352b90..d4eb716d989ba7 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -62,7 +62,7 @@
     'opt_einsum >= 2.3.2',
     'protobuf >= 3.8.0',
     'tensorboard >= 2.1.0, < 2.2.0',
-    'tensorflow_estimator >= 2.1.0rc0, < 2.2.0',
+    'tensorflow_estimator >= 2.1.0, < 2.2.0',
     'termcolor >= 1.1.0',
     'wrapt >= 1.11.1',
     # python3 requires wheel 0.26

From 9c689cbd2959bf0c7910c87e3e0a2dec10148aa9 Mon Sep 17 00:00:00 2001
From: Christian Sigg <chsigg@users.noreply.github.com>
Date: Mon, 9 Dec 2019 13:16:00 +0100
Subject: [PATCH 072/130] Add release note about CUDA 10.1 and cuDNN 7.6

---
 RELEASE.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/RELEASE.md b/RELEASE.md
index 8ba82c38e2874a..27eb30047dc556 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -17,6 +17,7 @@ TensorFlow 2.1 will be the last TF release supporting Python 2. Python 2 support
   * Keras reference implementations for many popular models are available in the TensorFlow [Model Garden](https://github.com/tensorflow/models/tree/master/official).
 * `tf.data`
   * Changes rebatching for `tf.data datasets` + distribution strategies for better performance.   Note that the dataset also behaves slightly differently, in that the rebatched dataset cardinality will always be a multiple of the number of replicas.
+* The `tensorflow` pip package is built with CUDA 10.1 and cuDNN 7.6.
 * `TensorRT`
   * [TensorRT 6.0](https://developer.nvidia.com/tensorrt#tensorrt-whats-new) is now supported and enabled by default. This adds support for more TensorFlow ops including Conv3D, Conv3DBackpropInputV2, AvgPool3D, MaxPool3D, ResizeBilinear, and ResizeNearestNeighbor. In addition, the TensorFlow-TensorRT python conversion API is exported as `tf.experimental.tensorrt.Converter`.
 

From 99d7cf3e4506150fc34b278a70057ed8f3ecc279 Mon Sep 17 00:00:00 2001
From: Goldie Gadde <ggadde@google.com>
Date: Mon, 9 Dec 2019 09:29:25 -0800
Subject: [PATCH 073/130] Fix the estimator version.

---
 tensorflow/tools/pip_package/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index d4eb716d989ba7..f4afd1ad352b90 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -62,7 +62,7 @@
     'opt_einsum >= 2.3.2',
     'protobuf >= 3.8.0',
     'tensorboard >= 2.1.0, < 2.2.0',
-    'tensorflow_estimator >= 2.1.0, < 2.2.0',
+    'tensorflow_estimator >= 2.1.0rc0, < 2.2.0',
     'termcolor >= 1.1.0',
     'wrapt >= 1.11.1',
     # python3 requires wheel 0.26

From 2e01ee17895806c0593b3308b976298835b20027 Mon Sep 17 00:00:00 2001
From: Goldie Gadde <ggadde@google.com>
Date: Mon, 9 Dec 2019 09:42:39 -0800
Subject: [PATCH 074/130] Update RELEASE.md

---
 RELEASE.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/RELEASE.md b/RELEASE.md
index 27eb30047dc556..2f1ea0880de711 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -4,6 +4,7 @@ TensorFlow 2.1 will be the last TF release supporting Python 2. Python 2 support
 
 ## Major Features and Improvements
 * The `tensorflow` pip package now includes GPU support by default (same as `tensorflow-gpu`) for both Linux and Windows. This runs on machines with and without NVIDIA GPUs. `tensorflow-gpu` is still available, and CPU-only packages can be downloaded at `tensorflow-cpu` for users who are concerned about package size.
+* The `tensorflow` pip package is built with CUDA 10.1 and cuDNN 7.6.
 * `tf.keras`
   * `Model.fit_generator`, `Model.evaluate_generator`, `Model.predict_generator`, `Model.train_on_batch`, `Model.test_on_batch`, and `Model.predict_on_batch` methods now respect the `run_eagerly` property, and will correctly run using tf.function by default.
   * `Model.fit_generator`, `Model.evaluate_generator`, and `Model.predict_generator` are deprecated endpoints. They are subsumed by `Model.fit`, `Model.evaluate`, and `Model.predict` which now support generators and Sequences.
@@ -17,7 +18,6 @@ TensorFlow 2.1 will be the last TF release supporting Python 2. Python 2 support
   * Keras reference implementations for many popular models are available in the TensorFlow [Model Garden](https://github.com/tensorflow/models/tree/master/official).
 * `tf.data`
   * Changes rebatching for `tf.data datasets` + distribution strategies for better performance.   Note that the dataset also behaves slightly differently, in that the rebatched dataset cardinality will always be a multiple of the number of replicas.
-* The `tensorflow` pip package is built with CUDA 10.1 and cuDNN 7.6.
 * `TensorRT`
   * [TensorRT 6.0](https://developer.nvidia.com/tensorrt#tensorrt-whats-new) is now supported and enabled by default. This adds support for more TensorFlow ops including Conv3D, Conv3DBackpropInputV2, AvgPool3D, MaxPool3D, ResizeBilinear, and ResizeNearestNeighbor. In addition, the TensorFlow-TensorRT python conversion API is exported as `tf.experimental.tensorrt.Converter`.
 

From 34fbff6964e437096d71832577ac5d5a7948e2e8 Mon Sep 17 00:00:00 2001
From: Taylor Robie <taylorrobie@google.com>
Date: Fri, 6 Dec 2019 12:58:13 -0800
Subject: [PATCH 075/130] Update keras standardization code to error out when a
 namedtuple is encountered.

PiperOrigin-RevId: 284244075
Change-Id: I2bee4628df9e0e7cbc0fde126d99020698731fa6
---
 .../python/keras/engine/data_adapter.py       |  17 +++
 tensorflow/python/keras/engine/training.py    |  39 ++++++
 .../python/keras/engine/training_test.py      | 121 ++++++++++++++++++
 tensorflow/python/keras/engine/training_v2.py |   6 +
 4 files changed, 183 insertions(+)

diff --git a/tensorflow/python/keras/engine/data_adapter.py b/tensorflow/python/keras/engine/data_adapter.py
index 1474cf7a127cca..50db978e77ad2b 100644
--- a/tensorflow/python/keras/engine/data_adapter.py
+++ b/tensorflow/python/keras/engine/data_adapter.py
@@ -19,6 +19,7 @@
 from __future__ import print_function
 
 import abc
+import collections
 import itertools
 import math
 import random
@@ -744,6 +745,7 @@ def __init__(self, x, y=None, sample_weights=None, standardize_function=None,
     # Since we have to know the dtype of the python generator when we build the
     # dataset, we have to look at a batch to infer the structure.
     peek, x = self._peek_and_restore(x)
+    assert_not_namedtuple(peek)
 
     (peek, wrap_in_tuple, elements_to_keep, partial_sample_weight,
      sample_weight_modes, nested_shape, nested_dtypes
@@ -1093,3 +1095,18 @@ def broadcast_sample_weight_modes(target_structure, sample_weight_modes):
             "structure:\n  {}\n    to  \n  {}".format(target_str, mode_str))
 
   return sample_weight_modes
+
+
+def assert_not_namedtuple(x):
+  if (isinstance(x, tuple) and
+      # TODO(b/144192902): Use a namedtuple checking utility.
+      hasattr(x, "_fields") and
+      isinstance(x._fields, collections.Sequence) and
+      all(isinstance(f, six.string_types) for f in x._fields)):
+    raise ValueError(
+        "Received namedtuple ({}) with fields `{}` as input. namedtuples "
+        "cannot, in general, be unambiguously resolved into `x`, `y`, "
+        "and `sample_weight`. For this reason Keras has elected not to "
+        "support them. If you would like the value to be unpacked, "
+        "please explicitly convert it to a tuple before passing it to "
+        "Keras.".format(x.__class__, x._fields))
diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
index eef8ad84d548a3..8e36b8a9e358dc 100644
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
@@ -628,6 +628,8 @@ def fit(self,
             `(inputs, targets, sample_weights)`.
           - A generator or `keras.utils.Sequence` returning `(inputs, targets)`
             or `(inputs, targets, sample weights)`.
+          A more detailed description of unpacking behavior for iterator types
+          (Dataset, generator, Sequence) is given below.
         y: Target data. Like the input data `x`,
           it could be either Numpy array(s) or TensorFlow tensor(s).
           It should be consistent with `x` (you cannot have Numpy inputs and
@@ -748,6 +750,30 @@ def fit(self,
             the generator as they can't be passed easily to children processes.
         **kwargs: Used for backwards compatibility.
 
+    Unpacking behavior for iterator-like inputs:
+        A common pattern is to pass a tf.data.Dataset, generator, or
+      tf.keras.utils.Sequence to the `x` argument of fit, which will in fact
+      yield not only features (x) but optionally targets (y) and sample weights.
+      Keras requires that the output of such iterator-likes be unambiguous. The
+      iterator should return a tuple of length 1, 2, or 3, where the optional
+      second and third elements will be used for y and sample_weight
+      respectively. Any other type provided will be wrapped in a length one
+      tuple, effectively treating everything as 'x'. When yielding dicts, they
+      should still adhere to the top-level tuple structure.
+      e.g. `({"x0": x0, "x1": x1}, y)`. Keras will not attempt to separate
+      features, targets, and weights from the keys of a single dict.
+        A notable unsupported data type is the namedtuple. The reason is that
+      it behaves like both an ordered datatype (tuple) and a mapping
+      datatype (dict). So given a namedtuple of the form:
+          `namedtuple("example_tuple", ["y", "x"])`
+      it is ambiguous whether to reverse the order of the elements when
+      interpreting the value. Even worse is a tuple of the form:
+          `namedtuple("other_tuple", ["x", "y", "z"])`
+      where it is unclear if the tuple was intended to be unpacked into x, y,
+      and sample_weight or passed through as a single element to `x`. As a
+      result the data processing code will simply raise a ValueError if it
+      encounters a namedtuple. (Along with instructions to remedy the issue.)
+
     Returns:
         A `History` object. Its `History.history` attribute is
         a record of training loss values and metrics values
@@ -817,6 +843,9 @@ def evaluate(self,
             if the model has named inputs.
           - A `tf.data` dataset.
           - A generator or `keras.utils.Sequence` instance.
+          A more detailed description of unpacking behavior for iterator types
+          (Dataset, generator, Sequence) is given in the `Unpacking behavior
+          for iterator-like inputs` section of `Model.fit`.
         y: Target data. Like the input data `x`,
           it could be either Numpy array(s) or TensorFlow tensor(s).
           It should be consistent with `x` (you cannot have Numpy inputs and
@@ -870,6 +899,9 @@ def evaluate(self,
             multiprocessing, you should not pass non-picklable arguments to
             the generator as they can't be passed easily to children processes.
 
+    See the discussion of `Unpacking behavior for iterator-like inputs` for
+    `Model.fit`.
+
     Returns:
         Scalar test loss (if the model has a single output and no metrics)
         or list of scalars (if the model has multiple outputs
@@ -918,6 +950,9 @@ def predict(self,
             (in case the model has multiple inputs).
           - A `tf.data` dataset.
           - A generator or `keras.utils.Sequence` instance.
+          A more detailed description of unpacking behavior for iterator types
+          (Dataset, generator, Sequence) is given in the `Unpacking behavior
+          for iterator-like inputs` section of `Model.fit`.
         batch_size: Integer or `None`.
             Number of samples per gradient update.
             If unspecified, `batch_size` will default to 32.
@@ -948,6 +983,10 @@ def predict(self,
             multiprocessing, you should not pass non-picklable arguments to
             the generator as they can't be passed easily to children processes.
 
+    See the discussion of `Unpacking behavior for iterator-like inputs` for
+    `Model.fit`. Note that Model.predict uses the same interpretation rules as
+    `Model.fit` and `Model.evaluate`, so inputs must be unambiguous for all
+    three methods.
 
     Returns:
         Numpy array(s) of predictions.
diff --git a/tensorflow/python/keras/engine/training_test.py b/tensorflow/python/keras/engine/training_test.py
index e67bd7b5084d90..10e1190ed41542 100644
--- a/tensorflow/python/keras/engine/training_test.py
+++ b/tensorflow/python/keras/engine/training_test.py
@@ -18,8 +18,10 @@
 from __future__ import division
 from __future__ import print_function
 
+import collections
 import io
 import logging
+import re
 import sys
 
 from absl.testing import parameterized
@@ -753,6 +755,125 @@ def test_evaluate_predict_on_arrays(self):
     })
     self.assertEqual(len(out), 2)
 
+  def _make_sequence_input_functions(self, input_type):
+    # train and test
+    xy_namedtuple = collections.namedtuple('xy_namedtuple', ['x', 'y'])
+
+    # predict
+    x_namedtuple = collections.namedtuple('x_namedtuple', ['x'])
+
+    if input_type == 'dataset':
+      dataset = dataset_ops.Dataset.range(16).map(
+          lambda _: array_ops.ones(shape=(1,)))
+
+      xy_dataset = dataset_ops.Dataset.zip((dataset, dataset)).batch(4)
+      x_dataset = dataset.batch(4)
+      def xy_function(use_namedtuple):
+        return xy_dataset.map(xy_namedtuple) if use_namedtuple else xy_dataset
+
+      def x_function(use_namedtuple):
+        return x_dataset.map(x_namedtuple) if use_namedtuple else x_dataset
+
+      return xy_function, x_function
+
+    elif input_type == 'generator':
+      def xy_generator(use_namedtuple):
+        x, y = np.ones((4, 1)), np.ones((4, 1))
+        for _ in range(4):
+          if use_namedtuple:
+            yield xy_namedtuple(x, y)
+          else:
+            yield x, y
+
+      def x_generator(use_namedtuple):
+        x = np.ones((4, 1))
+        for _ in range(4):
+          if use_namedtuple:
+            yield x_namedtuple(x)
+          else:
+            yield x
+
+      return xy_generator, x_generator
+
+    elif input_type == 'sequence':
+      class XYSequence(data_utils.Sequence):
+
+        def __init__(self, use_namedtuple):
+          self._use_namedtuple = use_namedtuple
+          super(XYSequence, self).__init__()
+
+        def __getitem__(self, idx):
+          x, y = np.ones((4, 1)), np.ones((4, 1))
+          if self._use_namedtuple:
+            return xy_namedtuple(x, y)
+          return x, y
+
+        def __len__(self):
+          return 4
+
+      class XSequence(data_utils.Sequence):
+
+        def __init__(self, use_namedtuple):
+          self._use_namedtuple = use_namedtuple
+          super(XSequence, self).__init__()
+
+        def __getitem__(self, idx):
+          x = np.ones((4, 1))
+          if self._use_namedtuple:
+            return x_namedtuple(x)
+          return x
+
+        def __len__(self):
+          return 4
+
+      return XYSequence, XSequence
+
+  @keras_parameterized.run_all_keras_modes(always_skip_v1=True)
+  @keras_parameterized.run_with_all_model_types
+  @parameterized.named_parameters(
+      ('dataset', 'dataset'),
+      ('generator', 'generator'),
+      ('sequence', 'sequence'),
+  )
+  def test_sequence_input_types(self, input_type):
+    """Ensure that namedtuples and tuples are plumbed identically."""
+    if not testing_utils.should_run_tf_function():
+      self.skipTest('Improved checking is only present in data_adapter.')
+
+    xy_function, x_function = self._make_sequence_input_functions(input_type)
+    fit_kwargs, evaluate_kwargs, predict_kwargs = {}, {}, {}
+    if input_type == 'generator':
+      fit_kwargs['steps_per_epoch'] = 4
+      evaluate_kwargs['steps'] = 4
+      predict_kwargs['steps'] = 4
+
+    model = testing_utils.get_small_mlp(1, 1, 1)
+    model.compile(
+        loss='mse',
+        optimizer='sgd',
+        run_eagerly=testing_utils.should_run_eagerly(),
+        experimental_run_tf_function=testing_utils.should_run_tf_function())
+
+    model.fit(xy_function(use_namedtuple=False), **fit_kwargs)
+    model.evaluate(xy_function(use_namedtuple=False), **evaluate_kwargs)
+    model.predict(x_function(use_namedtuple=False), **predict_kwargs)
+
+    xy_pattern = re.escape(
+        "Received namedtuple (<class '__main__.xy_namedtuple'>) with fields "
+        "`('x', 'y')` as input.")
+    x_pattern = re.escape(
+        "Received namedtuple (<class '__main__.x_namedtuple'>) with fields "
+        "`('x',)` as input.")
+
+    with self.assertRaisesRegex(ValueError, xy_pattern):
+      model.fit(xy_function(use_namedtuple=True), **fit_kwargs)
+
+    with self.assertRaisesRegex(ValueError, xy_pattern):
+      model.evaluate(xy_function(use_namedtuple=True), **evaluate_kwargs)
+
+    with self.assertRaisesRegex(ValueError, x_pattern):
+      model.predict(x_function(use_namedtuple=True), **predict_kwargs)
+
   @keras_parameterized.run_all_keras_modes
   @keras_parameterized.run_with_all_model_types
   def test_activity_regularizer_fit(self):
diff --git a/tensorflow/python/keras/engine/training_v2.py b/tensorflow/python/keras/engine/training_v2.py
index 476da84bcf7be5..3aeccb2171e9a7 100644
--- a/tensorflow/python/keras/engine/training_v2.py
+++ b/tensorflow/python/keras/engine/training_v2.py
@@ -662,6 +662,12 @@ def standardize_function(dataset):
       # Then we map using only the tensor standardization portion.
       def map_fn(x, y=None, sample_weights=None):
         """Tensor manipulation portion of standardization for Dataset.map."""
+        if (y is None and sample_weights is None):
+          # namedtuples are forbidden because it is ambiguous if they should be
+          # unpacked. If y or sample_weights is present then `x` was not the
+          # top level structure, and the correct behavior is unambiguous.
+          data_adapter.assert_not_namedtuple(x)
+
         standardized = model._standardize_tensors(
             x, y, sample_weights,
             run_eagerly=False,

From 7fe8e2dd69b26fe5a01a748ca168324c3c8deebd Mon Sep 17 00:00:00 2001
From: Katherine Wu <kathywu@google.com>
Date: Fri, 6 Dec 2019 16:22:55 -0800
Subject: [PATCH 076/130] Correct number of output shapes written by
 SavedModel.

After backpropagation rewrite, functions are rewritten with additional outputs. When exporting to SavedModel, the shapes of the additional outputs are incorrectly added to the FunctionDef proto. This CL excludes the extra output shapes from being added to the SavedModel.

PiperOrigin-RevId: 284284777
Change-Id: I553ed5117d99bdbef1de169f2406a7c5b1153190
---
 tensorflow/python/framework/ops.py         | 10 ++++-
 tensorflow/python/saved_model/save_test.py | 47 ++++++++++++++++++++++
 2 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 8a273e834be1b7..5a6991f931ea62 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -3092,8 +3092,16 @@ def _as_graph_def(self, from_version=None, add_shapes=False):
               op = func_graph.get_operation_by_name(node.name)
             except KeyError:
               continue
+            outputs = op.outputs
+
+            if op.type == "StatefulPartitionedCall":
+              # Filter out any extra outputs (possibly added by function
+              # backpropagation rewriting).
+              num_outputs = len(node.attr["Tout"].list.type)
+              outputs = outputs[:num_outputs]
+
             node.attr["_output_shapes"].list.shape.extend(
-                [output.get_shape().as_proto() for output in op.outputs])
+                [output.get_shape().as_proto() for output in outputs])
 
     return graph, self._version
 
diff --git a/tensorflow/python/saved_model/save_test.py b/tensorflow/python/saved_model/save_test.py
index e178c362d04637..8662cbaea518c7 100644
--- a/tensorflow/python/saved_model/save_test.py
+++ b/tensorflow/python/saved_model/save_test.py
@@ -442,6 +442,53 @@ def f(unused_v):
       save.save(root, os.path.join(self.get_temp_dir(), "saved_model"),
                 signatures=root.f)
 
+  def test_export_correct_output_shapes(self):
+    """Asserts that nodes are exported with the correct number of output shapes.
+
+    After backpropagation rewrite, functions are rewritten with additional
+    outputs. When exporting to SavedModel, the shapes of the additional outputs
+    were incorrectly added to the FunctionDef proto (b/133666530).
+    """
+    obj = tracking.AutoTrackable()
+    obj.v = variables.Variable(2.)
+
+    @def_function.function(input_signature=[
+        tensor_spec.TensorSpec(None, dtypes.float32)])
+    def f(x):
+      return (math_ops.multiply(obj.v, x),
+              math_ops.multiply(obj.v, (x+1)),
+              None)
+    obj.f = f
+
+    @def_function.function(input_signature=[
+        tensor_spec.TensorSpec(None, dtypes.float32)])
+    def g(x):
+      return obj.f(x)[1]
+    obj.g = g
+
+    # After the following lines, the concrete functions of obj.g and obj.f are
+    # rewritten with many extra outputs.
+    with backprop.GradientTape():
+      obj.g(constant_op.constant(3.0))
+
+    save_dir = os.path.join(self.get_temp_dir(), "saved_model")
+    save.save(obj, save_dir, signatures={"g": obj.g})
+    graph_def = loader_impl.parse_saved_model(save_dir).meta_graphs[0].graph_def
+
+    def assert_correct_number_of_output_shapes(node):
+      if node.op == "StatefulPartitionedCall":
+        fn_name = node.attr["f"].func.name
+        if fn_name.startswith("__inference_f"):
+          self.assertLen(node.attr["_output_shapes"].list.shape, 2)
+        if fn_name.startswith("__inference_g"):
+          self.assertLen(node.attr["_output_shapes"].list.shape, 1)
+
+    for f in graph_def.library.function:
+      if(f.signature.name.startswith("__inference_f") or
+         f.signature.name.startswith("__inference_g")):
+        for node in f.node_def:
+          assert_correct_number_of_output_shapes(node)
+
 
 class SavingOptionsTest(test.TestCase):
 

From ae387b0e02531b16d71eecf746f2833af5c4c350 Mon Sep 17 00:00:00 2001
From: rxsang <rxsang@google.com>
Date: Mon, 9 Dec 2019 10:36:00 -0800
Subject: [PATCH 077/130] Throw an explicit error if calling TPUStrategy in
 eager mode

---
 tensorflow/python/distribute/tpu_strategy.py | 60 ++++++++++++++++++--
 1 file changed, 55 insertions(+), 5 deletions(-)

diff --git a/tensorflow/python/distribute/tpu_strategy.py b/tensorflow/python/distribute/tpu_strategy.py
index 34c9578c65be73..bf5d4baba2c27f 100644
--- a/tensorflow/python/distribute/tpu_strategy.py
+++ b/tensorflow/python/distribute/tpu_strategy.py
@@ -37,6 +37,7 @@
 from tensorflow.python.distribute.cluster_resolver import TPUClusterResolver
 from tensorflow.python.eager import context
 from tensorflow.python.eager import def_function
+from tensorflow.python.eager import function
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import device_spec
 from tensorflow.python.framework import dtypes
@@ -82,6 +83,29 @@ def maybe_init_scope():
       yield
 
 
+def validate_experimental_run_function(fn):
+  """Validate the function passed into strategy.experimental_run_v2."""
+
+  # We allow three types of functions/objects passed into TPUStrategy
+  # experimental_run_v2 in eager mode:
+  #   1. a user annotated tf.function
+  #   2. a ConcreteFunction, this is mostly what you get from loading a saved
+  #      model.
+  #   3. a callable object and the `__call__` method itself is a tf.function.
+  #
+  # Otherwise we return an error, because we don't support eagerly running
+  # experimental_run_v2 in TPUStrategy.
+
+  if context.executing_eagerly() and not isinstance(
+      fn, def_function.Function) and not isinstance(
+          fn, function.ConcreteFunction) and not (callable(fn) and isinstance(
+              fn.__call__, def_function.Function)):
+    raise NotImplementedError(
+        "TPUStrategy.experimental_run_v2(fn, ...) does not support eager "
+        "execution. Either convert `fn` into a tf.function or consider "
+        "calling strategy.experimental_run_v2 inside a tf.function.")
+
+
 @tf_export("distribute.experimental.TPUStrategy", v1=[])
 class TPUStrategy(distribute_lib.Strategy):
   """TPU distribution strategy implementation."""
@@ -89,14 +113,36 @@ class TPUStrategy(distribute_lib.Strategy):
   def __init__(self,
                tpu_cluster_resolver=None,
                device_assignment=None):
-    """Initializes the TPUStrategy object.
+    """Synchronous training in TPU donuts or Pods.
+    
+    To construct a TPUStrategy object, you need to run the
+    initialization code as below:
+    
+    ```python
+    resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu=FLAGS.tpu)
+    tf.config.experimental_connect_to_cluster(resolver)
+    tf.tpu.experimental.initialize_tpu_system(resolver)
+    strategy = tf.distribute.experimental.TPUStrategy(resolver)
+    ```
+    
+    While using distribution strategies, the variables created within strategy's
+    scope will be replicated across all the replicas and can be kept in sync
+    using all-reduce algorithms.
+    
+    To run TF2 programs on TPUs, you can either use `.compile` and
+    `.fit` APIs in `tf.keras` with TPUStrategy, or write your own customized
+    training loop by calling `strategy.experimental_run_v2` directly. Note that
+    TPUStrategy doesn't support pure eager execution, so please make sure the
+    function passed into `strategy.experimental_run_v2` is a `tf.function` or
+    `strategy.experimental_run_v2` us called inside a `tf.function` if running
+    in eager mode.
 
     Args:
       tpu_cluster_resolver: A tf.distribute.cluster_resolver.TPUClusterResolver,
-          which provides information about the TPU cluster.
+        which provides information about the TPU cluster.
       device_assignment: Optional `tf.tpu.experimental.DeviceAssignment` to
-          specify the placement of replicas on the TPU cluster. Currently only
-          supports the usecase of using a single core within a TPU cluster.
+        specify the placement of replicas on the TPU cluster. Currently only
+        supports the usecase of using a single core within a TPU cluster.
     """
     super(TPUStrategy, self).__init__(TPUExtended(
         self, tpu_cluster_resolver, device_assignment=device_assignment))
@@ -111,6 +157,8 @@ def __init__(self,
   # This implementation runs a single step. It does not use infeed or outfeed.
   def experimental_run_v2(self, fn, args=(), kwargs=None):
     """See base class."""
+    validate_experimental_run_function(fn)
+
     # Note: the target function is converted to graph even when in Eager mode,
     # so autograph is on by default here.
     fn = autograph.tf_convert(fn, ag_ctx.control_status_ctx())
@@ -156,6 +204,8 @@ def steps_per_run(self):
   # can use the default implementation.
   # This implementation runs a single step. It does not use infeed or outfeed.
   def experimental_run_v2(self, fn, args=(), kwargs=None):
+    validate_experimental_run_function(fn)
+
     """See base class."""
     fn = autograph.tf_convert(fn, ag_ctx.control_status_ctx())
     return self.extended.tpu_run(fn, args, kwargs)
@@ -699,7 +749,7 @@ def replicated_fn(replica_id, replica_args, replica_kwargs):
         ]
 
       # Workaround for `tpu.replicate` behaviour when single `Tensor` returned.
-      if result[0] is None:
+      if result[0] is None or isinstance(result[0], ops.Operation):
         replicate_outputs = [None] * len(replicate_outputs)
       else:
         replicate_outputs = [

From 92617006d1c63ef5b1e00802c361ac7b39d84d1f Mon Sep 17 00:00:00 2001
From: Reed Wanderman-Milne <reedwm@google.com>
Date: Thu, 5 Dec 2019 21:29:15 -0800
Subject: [PATCH 078/130] Support DistributionStrategy in
 LossScaleGradientTape, take 2.

I previous tried this in de0be0deae93ea4c4452ceb23c91dd24a88fe62e, but it was rolled back due to breaking Windows. Autograph was causing an ImportError on Windows, so I now explicitly use a tf.while_loop.

PiperOrigin-RevId: 284116353
Change-Id: Ia5ef17ae8ddf36af3244c157ebc0ecbd807eccb0
---
 tensorflow/python/BUILD                       |  12 +-
 .../loss_scaling_gradient_tape.py             | 107 +++++-
 .../loss_scaling_gradient_tape_test.py        | 353 +++++++++++++-----
 3 files changed, 355 insertions(+), 117 deletions(-)

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index f4f3fa7b1b163f..bf8ad490dca464 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -3551,24 +3551,26 @@ py_library(
         ":loss_scale",
         ":unconnected_gradients",
         ":util",
+        "//tensorflow/python/distribute:distribute_lib",
         "//tensorflow/python/eager:backprop",
     ],
 )
 
-py_test(
+cuda_py_test(
     name = "loss_scaling_gradient_tape_test",
     size = "medium",
     srcs = ["training/experimental/loss_scaling_gradient_tape_test.py"],
-    python_version = "PY3",
-    deps = [
+    additional_deps = [
         ":client_testlib",
         ":constant_op",
+        ":framework_test_combinations_lib",
         ":loss_scale",
         ":loss_scaling_gradient_tape",
+        "@absl_py//absl/testing:parameterized",
+        "//third_party/py/numpy",
         "//tensorflow/python/compat:v2_compat",
+        "//tensorflow/python/distribute:mirrored_strategy",
         "//tensorflow/python/eager:def_function",
-        "//third_party/py/numpy",
-        "@absl_py//absl/testing:parameterized",
     ],
 )
 
diff --git a/tensorflow/python/training/experimental/loss_scaling_gradient_tape.py b/tensorflow/python/training/experimental/loss_scaling_gradient_tape.py
index 4b75a74bc3b2a6..caae7052b84a70 100644
--- a/tensorflow/python/training/experimental/loss_scaling_gradient_tape.py
+++ b/tensorflow/python/training/experimental/loss_scaling_gradient_tape.py
@@ -18,8 +18,10 @@
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.python.distribute import distribution_strategy_context
 from tensorflow.python.eager import backprop
-from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
 from tensorflow.python.ops.unconnected_gradients import UnconnectedGradients
 from tensorflow.python.training.experimental import loss_scale as loss_scale_module
 from tensorflow.python.util import nest
@@ -60,6 +62,13 @@ class LossScaleGradientTape(backprop.GradientTape):
     grads = tape.gradient(loss, vars)
     opt.apply_gradients(zip(grads, vars))
   ```
+
+  WARNING: Computing second-order (or higher) gradients with a
+  `LossScaleGradientTape` does not yet work properly when a
+  `tf.distribute.Strategy` is used. Computing second-order gradients will return
+  None instead of the gradient tensors. This only occurs when you nest multiple
+  gradient tapes under each other; if you do not nest them, this issue will not
+  occur.
   """
 
   def __init__(self,
@@ -133,22 +142,90 @@ def gradient(self,
     if self._tape is None:  # pylint: disable=access-member-before-definition
       raise RuntimeError("GradientTape.gradient can only be called once on "
                          "non-persistent tapes.")
+    if distribution_strategy_context.in_cross_replica_context():
+      raise ValueError("LossScaleGradientTape.gradient() must be called in a "
+                       "replica context.")
+
+    # Note: DistributionStrategy does not support running a while loop in a
+    # replica context. So, we call `_compute_gradients_until_finite` in a cross-
+    # replica context.
+    replica_context = distribution_strategy_context.get_replica_context()
+    grads = replica_context.merge_call(
+        _compute_gradients_until_finite,
+        args=(self, self._loss_scale, target, sources, output_gradients,
+              unconnected_gradients))
 
-    ready_to_update = False
-    grads = nest.map_structure(array_ops.zeros_like, sources)
+    if not self._outer_persistent:
+      self._tape = None  # free up resources if a persistent tape was not needed
+    return grads
 
-    while not ready_to_update and self._loss_scale() > 1:
-      with self:  # re-enter the gradient tape so it sees the loss scaling
-        loss_scale = self._loss_scale()
-        scaled_target = nest.map_structure(lambda t: t * loss_scale, target)
 
-      old_grads = super(LossScaleGradientTape, self).gradient(
+def _compute_gradients_until_finite(
+    distribution, loss_scale_gradient_tapes, loss_scale, target, sources,
+    output_gradients, unconnected_gradients):
+  """Compute gradients and update the loss scale until the gradients are finite.
+
+  This must be called in a cross-replica context.
+
+  This is a function instead of a method of LossScaleGradientTape, as the `self`
+  parameter would be meaningless. There is one LossScaleGradientTape per
+  replica, but this function is called once total (not per replica), so there
+  cannot be a singular `self` parameter.
+
+  Args:
+    distribution: The distribution strategy in effect.
+    loss_scale_gradient_tapes: A PerReplica value of LossScaleGradientTapes.
+      Contains the LossScaleGradientTape of each replica.
+    loss_scale: The loss scale to use to scale the loss and unscale the
+      gradient.
+    target: a list or nested structure of Tensors or Variables to be
+      differentiated.
+    sources: a list or nested structure of Tensors or Variables. `target` will
+      be differentiated against elements in `sources`.
+    output_gradients: Passed to GradientTape.gradient
+    unconnected_gradients: Pass to GradientTape.gradient.
+
+  Returns:
+    The gradients of `target` with respect to `sources`.
+  """
+  # Autograph cannot convert this function, so we must use an explicit
+  # tf.while_loop.
+  # TODO(b/143572314): Fix Autograph so that it can convert this function, then
+  # replace the tf.while_loop with a Python while loop.
+
+  def cond(grads, ready_to_update):
+    """The condition of the while loop."""
+    del grads
+    # Equivalent to: `not ready_to_update and loss_scale() > 1`
+    return math_ops.logical_and(math_ops.logical_not(ready_to_update),
+                                math_ops.greater(loss_scale(), 1))
+
+  def body(grads, ready_to_update):
+    """The body of the while loop."""
+    del grads, ready_to_update
+    def replica_fn(gradient_tape, target, sources, output_gradients):
+      """Scales the loss, computes the gradients, and unscales the gradients."""
+      loss_scale_val = loss_scale()
+      with gradient_tape:  # re-enter gradient tape so it sees the loss scaling
+        scaled_target = nest.map_structure(lambda t: t * loss_scale_val, target)
+      old_grads = super(LossScaleGradientTape, gradient_tape).gradient(
           scaled_target, sources, output_gradients, unconnected_gradients)
-      inv_loss_scale = 1.0 / self._loss_scale()
+      inv_loss_scale = 1.0 / loss_scale_val
       grads = nest.map_structure(lambda g: inv_loss_scale * g, old_grads)
-      # Check for non-finite gradients possibly resulting from scaling
-      _, ready_to_update = self._loss_scale.update(grads)
-
-    if not self._outer_persistent:
-      self._tape = None  # free up resources if a persistent tape was not needed
-    return grads
+      return grads
+
+    # Switch to a replica-context to compute gradients once per replica.
+    grads = distribution.experimental_run_v2(
+        replica_fn, args=(loss_scale_gradient_tapes, target, sources,
+                          output_gradients))
+    # Check for non-finite gradients possibly resulting from scaling
+    _, ready_to_update = loss_scale.update(grads)
+    return grads, ready_to_update
+
+  # Dummy value for initial_grads. The first iteration of the loop will
+  # overwrite `grads` to the actual gradients.
+  initial_grads = sources
+  initial_ready_to_update = False
+  grads, _ = control_flow_ops.while_loop(
+      cond, body, [initial_grads, initial_ready_to_update])
+  return grads
diff --git a/tensorflow/python/training/experimental/loss_scaling_gradient_tape_test.py b/tensorflow/python/training/experimental/loss_scaling_gradient_tape_test.py
index b8c85a929da820..36d7d18a93b8d3 100644
--- a/tensorflow/python/training/experimental/loss_scaling_gradient_tape_test.py
+++ b/tensorflow/python/training/experimental/loss_scaling_gradient_tape_test.py
@@ -20,58 +20,137 @@
 from absl.testing import parameterized
 import numpy as np
 from tensorflow.python.compat import v2_compat
+from tensorflow.python.distribute import distribution_strategy_context
+from tensorflow.python.distribute import mirrored_strategy
+from tensorflow.python.distribute import values
+from tensorflow.python.eager import context
 from tensorflow.python.eager import def_function
 from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import test_combinations
 from tensorflow.python.platform import test
 from tensorflow.python.training.experimental import loss_scale as loss_scale_module
 from tensorflow.python.training.experimental import loss_scaling_gradient_tape as lsgt
+from tensorflow.python.util import nest
+
+
+# If called outside any strategy.scope() calls, this will return the default
+# strategy.
+default_strategy_fn = distribution_strategy_context.get_strategy
+
+
+def create_mirrored_strategy():
+  if context.num_gpus() >= 1:
+    return mirrored_strategy.MirroredStrategy(['cpu:0', 'gpu:0'])
+  else:
+    return mirrored_strategy.MirroredStrategy(['cpu:0'])
 
 
 class LossScaleGradientTapeTest(test.TestCase, parameterized.TestCase):
 
-  @parameterized.parameters(loss_scale_module.FixedLossScale,
-                            loss_scale_module.DynamicLossScale)
-  def test_basic_tapes_eager_mode(self, loss_scale):
-    x = constant_op.constant(3.0)
-    with lsgt.LossScaleGradientTape(loss_scale(32)) as g:
-      g.watch(x)
-      y = x * x
-    dy_dx = g.gradient(y, x)
-    self.assertEqual(self.evaluate(dy_dx), 6.0)
+  def _run_with_strategy(self, run_fn, strategy, use_tf_function=False):
+    """Runs `run_fn` under the DistributionStrategy `strategy`.
 
-  @parameterized.parameters(loss_scale_module.FixedLossScale,
-                            loss_scale_module.DynamicLossScale)
-  def test_basic_tapes_graph_mode(self, loss_scale):
-    loss_scale = loss_scale(32)
+    Runs `run_fn` with `strategy.experimental_run_v2`. Returns a list of the
+    return values of `run_fn`, one per replica.
 
-    @def_function.function
-    def _inner_test():
+    Args:
+      run_fn: The function to run.
+      strategy: The DistributionStrategy to run `run_fn` with.
+      use_tf_function: If True, call `run_fn` under a tf.function.
+
+    Returns:
+      A list of tensors, each being the return value of `run_fn` from one
+      replica. If a nested structure is returned from `run_fn`, returns a
+      nested structure, where each element is a list of tensors.
+    """
+    strategy_fn = lambda: strategy.experimental_run_v2(run_fn)
+    if use_tf_function:
+      strategy_fn = def_function.function(strategy_fn)
+
+    results = strategy_fn()
+
+    def convert_tensor_to_list(tensor):
+      if isinstance(tensor, values.DistributedValues):
+        return tensor.values
+      else:
+        return [tensor]
+    return nest.map_structure(convert_tensor_to_list, results)
+
+  @test_combinations.generate(test_combinations.combine(
+      loss_scale=[loss_scale_module.FixedLossScale,
+                  loss_scale_module.DynamicLossScale],
+      strategy_fn=[default_strategy_fn, create_mirrored_strategy],
+      use_tf_function=[True, False]
+  ))
+  def test_basic_tapes(self, loss_scale, strategy_fn, use_tf_function):
+    loss_scale = loss_scale(32)
+    def run_fn():
       x = constant_op.constant(3.0)
       with lsgt.LossScaleGradientTape(loss_scale) as g:
         g.watch(x)
         y = x * x
       return g.gradient(y, x)
-    self.assertEqual(self.evaluate(_inner_test()), 6.0)
+    dy_dx_list = self._run_with_strategy(run_fn, strategy_fn(), use_tf_function)
+    self.assertEqual(loss_scale(), 32)
+    for dy_dx in dy_dx_list:
+      self.assertEqual(dy_dx, 6.0)
 
-  @parameterized.parameters(loss_scale_module.FixedLossScale,
-                            loss_scale_module.DynamicLossScale)
-  def test_nested_tapes(self, loss_scale):
-    x = constant_op.constant(3.0)
-    with lsgt.LossScaleGradientTape(loss_scale(32)) as g:
-      g.watch(x)
-      with lsgt.LossScaleGradientTape(loss_scale(32)) as gg:
-        gg.watch(x)
+  @test_combinations.generate(test_combinations.combine(
+      loss_scale=[loss_scale_module.FixedLossScale,
+                  loss_scale_module.DynamicLossScale],
+      strategy_fn=[default_strategy_fn, create_mirrored_strategy],
+      use_tf_function=[True, False]
+  ))
+  def test_output_gradients(self, loss_scale, strategy_fn, use_tf_function):
+    loss_scale = loss_scale(32)
+    def run_fn():
+      x = constant_op.constant(3.0)
+      with lsgt.LossScaleGradientTape(loss_scale) as g:
+        g.watch(x)
         y = x * x
-      dy_dx = gg.gradient(y, x)
-      self.assertEqual(self.evaluate(dy_dx), 6.0)
-    d2y_dx2 = g.gradient(dy_dx, x)
-    self.assertEqual(self.evaluate(d2y_dx2), 2.0)
-
-  @parameterized.parameters(loss_scale_module.FixedLossScale,
-                            loss_scale_module.DynamicLossScale)
-  def test_non_persistent_tapes_error(self, loss_scale):
+      return g.gradient(y, x, output_gradients=constant_op.constant(2.0))
+    dy_dx_list = self._run_with_strategy(run_fn, strategy_fn(), use_tf_function)
+    self.assertEqual(loss_scale(), 32)
+    for dy_dx in dy_dx_list:
+      self.assertEqual(dy_dx, 12.0)
+
+  @test_combinations.generate(test_combinations.combine(
+      loss_scale=[loss_scale_module.FixedLossScale,
+                  loss_scale_module.DynamicLossScale],
+      strategy_fn=[default_strategy_fn],
+      use_tf_function=[True, False]
+  ))
+  def test_nested_tapes(self, loss_scale, strategy_fn, use_tf_function):
+    # TODO(reedwm): Support nested tapes with mirrored strategy. Currently this
+    # does not work, as the set of active gradient tapes is a thread-local
+    # variable. Mirrored strategy spawns new threads, making the outer gradient
+    # tape non-active when using the inner gradient tape.
+    outer_loss_scale = loss_scale(32)
+    inner_loss_scale = loss_scale(32)
+    def run_fn():
+      x = constant_op.constant(3.0)
+      with lsgt.LossScaleGradientTape(outer_loss_scale) as g:
+        g.watch(x)
+        with lsgt.LossScaleGradientTape(inner_loss_scale) as gg:
+          gg.watch(x)
+          y = x * x
+        dy_dx = gg.gradient(y, x)
+      d2y_dx2 = g.gradient(dy_dx, x)
+      return dy_dx, d2y_dx2
+
+    dy_dx_list, d2y_dx2_list = self._run_with_strategy(run_fn, strategy_fn(),
+                                                       use_tf_function)
+    self.assertEqual(outer_loss_scale(), 32)
+    self.assertEqual(inner_loss_scale(), 32)
+    for dy_dx in dy_dx_list:
+      self.assertEqual(dy_dx, 6.0)
+    for d2y_dx2 in d2y_dx2_list:
+      self.assertEqual(d2y_dx2, 2.0)
+
+  def test_non_persistent_tapes_error(self):
     x = constant_op.constant(3.0)
-    with lsgt.LossScaleGradientTape(loss_scale(32), persistent=False) as g:
+    with lsgt.LossScaleGradientTape(loss_scale_module.FixedLossScale(32),
+                                    persistent=False) as g:
       g.watch(x)
       y = x * x
       z = y * y
@@ -79,21 +158,36 @@ def test_non_persistent_tapes_error(self, loss_scale):
     with self.assertRaisesRegexp(RuntimeError, 'persistent'):
       g.gradient(y, x)
 
-  @parameterized.parameters(loss_scale_module.FixedLossScale,
-                            loss_scale_module.DynamicLossScale)
-  def test_persistent_tapes(self, loss_scale):
-    x = constant_op.constant(3.0)
-    with lsgt.LossScaleGradientTape(loss_scale(32), persistent=True) as g:
-      g.watch(x)
-      y = x * x
-      z = y * y
-    dz_dx = g.gradient(z, x)
-    self.assertEqual(self.evaluate(dz_dx), 108.0)
-    dy_dx = g.gradient(y, x)
-    self.assertEqual(self.evaluate(dy_dx), 6.0)
+  @test_combinations.generate(test_combinations.combine(
+      loss_scale=[loss_scale_module.FixedLossScale,
+                  loss_scale_module.DynamicLossScale],
+      strategy_fn=[default_strategy_fn, create_mirrored_strategy],
+      use_tf_function=[True, False]
+  ))
+  def test_persistent_tapes(self, loss_scale, strategy_fn, use_tf_function):
 
-  @parameterized.parameters(loss_scale_module.FixedLossScale,
-                            loss_scale_module.DynamicLossScale)
+    ls = loss_scale(32)
+    def run_fn():
+      x = constant_op.constant(3.0)
+      with lsgt.LossScaleGradientTape(ls, persistent=True) as g:
+        g.watch(x)
+        y = x * x
+        z = y * y
+      dz_dx = g.gradient(z, x)
+      dy_dx = g.gradient(y, x)
+      return dz_dx, dy_dx
+
+    dz_dx_list, dy_dx_list = self._run_with_strategy(run_fn, strategy_fn(),
+                                                     use_tf_function)
+    for dz_dx in dz_dx_list:
+      self.assertEqual(dz_dx, 108.0)
+    for dy_dx in dy_dx_list:
+      self.assertEqual(dy_dx, 6.0)
+
+  @test_combinations.generate(test_combinations.combine(
+      loss_scale=[loss_scale_module.FixedLossScale,
+                  loss_scale_module.DynamicLossScale],
+  ))
   def test_nested_sources(self, loss_scale):
     x = (constant_op.constant(19.0), (constant_op.constant(8.),
                                       constant_op.constant(9.)))
@@ -103,8 +197,10 @@ def test_nested_sources(self, loss_scale):
     dy_dx = g.gradient(y, x)
     self.assertEqual(self.evaluate(dy_dx), (13., (13., 13.)))
 
-  @parameterized.parameters(loss_scale_module.FixedLossScale,
-                            loss_scale_module.DynamicLossScale)
+  @test_combinations.generate(test_combinations.combine(
+      loss_scale=[loss_scale_module.FixedLossScale,
+                  loss_scale_module.DynamicLossScale],
+  ))
   def test_nested_targets(self, loss_scale):
     w = constant_op.constant(3.0)
     with lsgt.LossScaleGradientTape(loss_scale(32)) as g:
@@ -115,67 +211,130 @@ def test_nested_targets(self, loss_scale):
     grad = g.gradient([x, (y, z)], w)
     self.assertEqual(self.evaluate(grad), 23)
 
-  @parameterized.parameters(loss_scale_module.FixedLossScale,
-                            loss_scale_module.DynamicLossScale)
-  def test_scaling_inf_gradient(self, loss_scale):
-    x = constant_op.constant(1.0)
-    with lsgt.LossScaleGradientTape(loss_scale(32)) as g:
-      g.watch(x)
-      y = x * np.inf
-    dy_dx = g.gradient(y, x)
-    self.assertEqual(self.evaluate(dy_dx), np.inf)
+  @test_combinations.generate(test_combinations.combine(
+      loss_scale=[loss_scale_module.FixedLossScale,
+                  loss_scale_module.DynamicLossScale],
+      strategy_fn=[default_strategy_fn, create_mirrored_strategy],
+      non_finite_term=[np.inf, np.nan],
+  ))
+  def test_scaling_non_finite_gradient(self, loss_scale, strategy_fn,
+                                       non_finite_term):
+    loss_scale = loss_scale(32)
+    def run_fn():
+      x = constant_op.constant(1.0)
+      with lsgt.LossScaleGradientTape(loss_scale) as g:
+        g.watch(x)
+        y = x * non_finite_term
+      return g.gradient(y, x)
 
-  @parameterized.parameters(loss_scale_module.FixedLossScale,
-                            loss_scale_module.DynamicLossScale)
-  def test_scaling_nan_gradient(self, loss_scale):
-    x = constant_op.constant(1.0)
-    with lsgt.LossScaleGradientTape(loss_scale(32)) as g:
-      g.watch(x)
-      y = x * np.nan
-    dy_dx = g.gradient(y, x)
-    self.assertTrue(np.isnan(self.evaluate(dy_dx)))
+    dy_dx_list = self._run_with_strategy(run_fn, strategy_fn())
+    check_fn = np.isposinf if non_finite_term == np.inf else np.isnan
+    for dy_dx in dy_dx_list:
+      self.assertTrue(check_fn(dy_dx))
 
-  @parameterized.parameters(np.inf, np.nan)
-  def test_dynamic_scale_to_one_on_non_finite_gradient(self, non_finite_term):
+  @test_combinations.generate(test_combinations.combine(
+      strategy_fn=[default_strategy_fn, create_mirrored_strategy],
+      non_finite_term=[np.inf, np.nan],
+      use_tf_function=[True, False],
+  ))
+  def test_dynamic_scale_to_one_on_non_finite_gradient(
+      self, strategy_fn, non_finite_term, use_tf_function):
     loss_scale = loss_scale_module.DynamicLossScale(initial_loss_scale=32)
-    x = constant_op.constant(1.0)
-    with lsgt.LossScaleGradientTape(loss_scale) as g:
-      g.watch(x)
-      y = x * non_finite_term
-    g.gradient(y, x)
+    def run_fn():
+      x = constant_op.constant(1.0)
+      with lsgt.LossScaleGradientTape(loss_scale) as g:
+        g.watch(x)
+        y = x * non_finite_term
+      g.gradient(y, x)
+
+    self._run_with_strategy(run_fn, strategy_fn(), use_tf_function)
+    self.assertEqual(self.evaluate(loss_scale()), 1.0)
+
+  @test_combinations.generate(test_combinations.combine(
+      use_tf_function=[True, False],
+  ))
+  def test_dynamic_scale_to_one_on_non_finite_gradient_on_last_replica(
+      self, use_tf_function):
+    if context.num_gpus() < 1:
+      # Requires the mirrored strategy to have two replicas: one on the CPU and
+      # one on the GPU
+      self.skipTest('Test requires at least 1 GPU')
+    loss_scale = loss_scale_module.DynamicLossScale(initial_loss_scale=32)
+    def run_fn():
+      x = constant_op.constant(1.0)
+      with lsgt.LossScaleGradientTape(loss_scale) as g:
+        g.watch(x)
+        # The gradient will be finite on the first replica, and infinite on the
+        # second
+        rep_ctx = distribution_strategy_context.get_replica_context()
+        if rep_ctx.replica_id_in_sync_group == rep_ctx.num_replicas_in_sync - 1:
+          y = x * np.inf
+        else:
+          y = x * 2
+      return g.gradient(y, x)
+
+    replica0_grad, replica1_grad = self._run_with_strategy(
+        run_fn, create_mirrored_strategy(), use_tf_function)
     self.assertEqual(self.evaluate(loss_scale()), 1.0)
+    self.assertEqual(replica0_grad, 2.0)
+    self.assertEqual(replica1_grad, np.inf)
 
-  @parameterized.parameters([np.inf, np.isposinf], [np.nan, np.isnan])
-  def test_fixed_scaling_no_change_non_finite_gradient(self, non_finite_term,
-                                                       is_non_finite):
+  @test_combinations.generate(test_combinations.combine(
+      strategy_fn=[default_strategy_fn, create_mirrored_strategy],
+      non_finite_term=[np.inf, np.nan],
+  ))
+  def test_fixed_scaling_no_change_non_finite_gradient(self, strategy_fn,
+                                                       non_finite_term):
     loss_scale = loss_scale_module.FixedLossScale(32)
-    x = constant_op.constant(1.0)
-    with lsgt.LossScaleGradientTape(loss_scale) as g:
-      g.watch(x)
-      y = x * non_finite_term
-    dy_dx = g.gradient(y, x)
-    self.assertTrue(is_non_finite(self.evaluate(dy_dx)))
+    def run_fn():
+      x = constant_op.constant(1.0)
+      with lsgt.LossScaleGradientTape(loss_scale) as g:
+        g.watch(x)
+        y = x * non_finite_term
+      return g.gradient(y, x)
+
+    dy_dx_list = self._run_with_strategy(run_fn, strategy_fn())
+    check_fn = np.isposinf if non_finite_term == np.inf else np.isnan
+    for dy_dx in dy_dx_list:
+      self.assertTrue(check_fn(self.evaluate(dy_dx)))
     self.assertEqual(self.evaluate(loss_scale()), 32.0)
 
-  def test_dynamic_loss_scaling_down_loop(self):
+  @test_combinations.generate(test_combinations.combine(
+      strategy_fn=[default_strategy_fn, create_mirrored_strategy],
+      use_tf_function=[True, False]
+  ))
+  def test_dynamic_loss_scaling_down_loop(self, strategy_fn, use_tf_function):
     loss_scale = loss_scale_module.DynamicLossScale(initial_loss_scale=32)
-    x = constant_op.constant(1.0)
-    with lsgt.LossScaleGradientTape(loss_scale) as g:
-      g.watch(x)
-      y = x * (3.0 * (10**37))  # grad will be inf after scaling
-    dy_dx = g.gradient(y, x)
+    def run_fn():
+      x = constant_op.constant(1.0)
+      with lsgt.LossScaleGradientTape(loss_scale) as g:
+        g.watch(x)
+        y = x * (3.0 * (10**37))  # grad will be inf after scaling
+      return g.gradient(y, x)
+
+    dy_dx_list = self._run_with_strategy(run_fn, strategy_fn(), use_tf_function)
     self.assertEqual(self.evaluate(loss_scale()), 8.0)
-    self.assertAllClose(self.evaluate(dy_dx), (3.0 * (10**37)), atol=1e-06)
+    for dy_dx in dy_dx_list:
+      self.assertAllClose(self.evaluate(dy_dx), (3.0 * (10**37)), atol=1e-06)
 
-  def test_dynamic_loss_scaling_inf_target_post_scale(self):
+  @test_combinations.generate(test_combinations.combine(
+      strategy_fn=[default_strategy_fn, create_mirrored_strategy],
+      use_tf_function=[True, False]
+  ))
+  def test_dynamic_loss_scaling_inf_target_post_scale(self, strategy_fn,
+                                                      use_tf_function):
     loss_scale = loss_scale_module.DynamicLossScale(initial_loss_scale=32.0)
-    x = constant_op.constant(3.0 * (10**37))
-    with lsgt.LossScaleGradientTape(loss_scale) as g:
-      g.watch(x)
-      y = x * 3.0  # target will be inf after scaling
-    dy_dx = g.gradient(y, x)
-    self.assertAllClose(self.evaluate(dy_dx), 3.0)
+    def run_fn():
+      x = constant_op.constant(3.0 * (10**37))
+      with lsgt.LossScaleGradientTape(loss_scale) as g:
+        g.watch(x)
+        y = x * 3.0  # target will be inf after scaling
+      return g.gradient(y, x)
+
+    dy_dx_list = self._run_with_strategy(run_fn, strategy_fn(), use_tf_function)
     self.assertEqual(self.evaluate(loss_scale()), 32.0)
+    for dy_dx in dy_dx_list:
+      self.assertAllClose(self.evaluate(dy_dx), 3.0)
 
 
 if __name__ == '__main__':

From 2b5075c07980a6b959ea3be3a35b62e69e85430f Mon Sep 17 00:00:00 2001
From: Taylor Robie <taylorrobie@google.com>
Date: Mon, 9 Dec 2019 16:25:35 -0800
Subject: [PATCH 079/130] Merge 9422eb1139b3163cf65950c6e713f39344ec33e4 into
 the 2.1 release branch

---
 tensorflow/python/keras/layers/core.py      | 150 ++++++++++++++------
 tensorflow/python/keras/layers/core_test.py |  91 +++++++++---
 2 files changed, 178 insertions(+), 63 deletions(-)

diff --git a/tensorflow/python/keras/layers/core.py b/tensorflow/python/keras/layers/core.py
index aad66429b75000..e4999466ba2d3d 100644
--- a/tensorflow/python/keras/layers/core.py
+++ b/tensorflow/python/keras/layers/core.py
@@ -19,12 +19,15 @@
 from __future__ import print_function
 
 import copy
+import functools
 import sys
+import textwrap
 import types as python_types
 import warnings
 
 import numpy as np
 
+from tensorflow.python.eager import backprop
 from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -47,6 +50,8 @@
 from tensorflow.python.ops import sparse_ops
 from tensorflow.python.ops import standard_ops
 from tensorflow.python.ops import variable_scope
+from tensorflow.python.platform import tf_logging
+from tensorflow.python.training.tracking import base as trackable
 from tensorflow.python.util import nest
 from tensorflow.python.util import tf_inspect
 from tensorflow.python.util.tf_export import keras_export
@@ -690,7 +695,7 @@ class Lambda(Layer):
   can be used when constructing `Sequential` and Functional API
   models. `Lambda` layers are best suited for simple operations or
   quick experimentation. For more advanced usecases, follow 
-  [this guide](https://www.tensorflow.org/alpha/guide/keras/custom_layers_and_models) 
+  [this guide](https://www.tensorflow.org/guide/keras/custom_layers_and_models)
   for subclassing `tf.keras.layers.Layer`. 
   
   The main reason to subclass `tf.keras.layers.Layer` instead of using a 
@@ -721,30 +726,34 @@ def antirectifier(x):
   model.add(Lambda(antirectifier))
   ```
 
-  Variables can be created within a `Lambda` layer. Like with
-  other layers, these variables will be created only once and reused
-  if the `Lambda` layer is called on new inputs. If creating more
-  than one variable in a given `Lambda` instance, be sure to use
-  a different name for each variable. Note that calling sublayers
-  from within a `Lambda` is not supported.
+  Variables:
+    While it is possible to use Variables with Lambda layers, this practice is
+    discouraged as it can easily lead to bugs. For instance, consider the
+    following layer:
 
-  Example of variable creation:
+    ```python
+      scale = tf.Variable(1.)
+      scale_layer = tf.keras.layers.Lambda(lambda x: x * scale)
+    ```
 
-  ```python
-  def linear_transform(x):
-    v1 = tf.Variable(1., name='multiplier')
-    v2 = tf.Variable(0., name='bias')
-    return x*v1 + v2
-
-  linear_layer = Lambda(linear_transform)
-  model.add(linear_layer)
-  model.add(keras.layers.Dense(10, activation='relu'))
-  model.add(linear_layer)  # Reuses existing Variables
-  ```
+    Because scale_layer does not directly track the `scale` variable, it will
+    not appear in `scale_layer.trainable_weights` and will therefore not be
+    trained if `scale_layer` is used in a Model.
+
+    A better pattern is to write a subclassed Layer:
+
+    ```python
+      class ScaleLayer(tf.keras.layers.Layer):
+        def __init__(self):
+          super(ScaleLayer, self).__init__()
+          self.scale = tf.Variable(1.)
+
+        def call(self, inputs):
+          return inputs * self.scale
+    ```
 
-  Note that creating two instances of `Lambda` using the same function
-  will *not* share Variables between the two instances. Each instance of
-  `Lambda` will create and manage its own weights.
+    In general, Lambda layers can be convenient for simple stateless
+    computation, but anything more complex should use a subclass Layer instead.
 
   Arguments:
     function: The function to be evaluated. Takes input tensor as first
@@ -769,22 +778,25 @@ def linear_transform(x):
   Output shape: Specified by `output_shape` argument
   """
 
+  @trackable.no_automatic_dependency_tracking
   def __init__(self, function, output_shape=None, mask=None, arguments=None,
                **kwargs):
     super(Lambda, self).__init__(**kwargs)
+
+    self.arguments = arguments or {}
     self.function = function
-    self.arguments = arguments if arguments else {}
+    self._function_with_args = functools.partial(function, **self.arguments)
+
     if mask is not None:
       self.supports_masking = True
     self.mask = mask
     self._supports_ragged_inputs = True
     self._output_shape = output_shape
-    self._variable_dict = {}
-    # These attributes are inherited from `Layer`.
-    self._trainable_weights = []
-    self._non_trainable_weights = []
 
-    function_args = tf_inspect.getfullargspec(self.function).args
+    # Warning on every invocation will be quite irksome in Eager mode.
+    self._already_warned = False
+
+    function_args = tf_inspect.getfullargspec(function).args
     self._fn_expects_training_arg = 'training' in function_args
     self._fn_expects_mask_arg = 'mask' in function_args
 
@@ -818,26 +830,72 @@ def _add_batch(shape):
     return nest.map_structure(_add_batch, output_shapes)
 
   def call(self, inputs, mask=None, training=None):
-    arguments = self.arguments
+    kwargs = {}
     if self._fn_expects_mask_arg:
-      arguments['mask'] = mask
+      kwargs['mask'] = mask
     if self._fn_expects_training_arg:
-      arguments['training'] = training
-    with variable_scope.variable_creator_scope(self._variable_creator):
-      return self.function(inputs, **arguments)
-
-  def _variable_creator(self, next_creator, **kwargs):
-    name = kwargs['name']
-    if name in self._variable_dict:
-      return self._variable_dict[name]
-    var = next_creator(**kwargs)
-    self._variable_dict[name] = var
-    if var.trainable:
-      self._trainable_weights.append(var)
-    else:
-      self._non_trainable_weights.append(var)
-    K.track_variable(var)
-    return var
+      kwargs['training'] = training
+
+    call_fn = self._function_with_args
+    if kwargs:
+      call_fn = functools.partial(call_fn, **kwargs)
+
+    created_variables = []
+    def _variable_creator(next_creator, **kwargs):
+      var = next_creator(**kwargs)
+      created_variables.append(var)
+      return var
+
+    with backprop.GradientTape(watch_accessed_variables=True) as tape,\
+        variable_scope.variable_creator_scope(_variable_creator):
+      result = call_fn(inputs)
+    self._check_variables(created_variables, tape.watched_variables())
+    return result
+
+  def _check_variables(self, created_variables, accessed_variables):
+    if not created_variables and not accessed_variables:
+      # In the common case that a Lambda layer does not touch a Variable, we
+      # don't want to incur the runtime cost of assembling any state used for
+      # checking only to immediately discard it.
+      return
+
+    tracked_weights = set(v.experimental_ref() for v in self.weights)
+    untracked_new_vars = [v for v in created_variables
+                          if v.experimental_ref() not in tracked_weights]
+    if untracked_new_vars:
+      variable_str = '\n'.join(['  {}'.format(i) for i in untracked_new_vars])
+      error_str = textwrap.dedent(
+          '''
+          The following Variables were created within a Lambda layer ({name})
+          but are not tracked by said layer:
+          {variable_str}
+          The layer cannot safely ensure proper Variable reuse across multiple
+          calls, and consquently this behavior is disallowed for safety. Lambda
+          layers are not well suited to stateful computation; instead, writing a
+          subclassed Layer is the recommend way to define layers with
+          Variables.'''
+      ).format(name=self.name, variable_str=variable_str)
+      raise ValueError(error_str)
+
+    untracked_used_vars = [v for v in accessed_variables
+                           if v.experimental_ref() not in tracked_weights]
+    if untracked_used_vars and not self._already_warned:
+      variable_str = '\n'.join(['  {}'.format(i) for i in untracked_used_vars])
+      self._warn(textwrap.dedent(
+          '''
+          The following Variables were used a Lambda layer's call ({name}), but
+          are not present in its tracked objects:
+          {variable_str}
+          It is possible that this is intended behavior, but it is more likely
+          an omission. This is a strong indication that this layer should be
+          formulated as a subclassed Layer rather than a Lambda layer.'''
+      ).format(name=self.name, variable_str=variable_str))
+      self._already_warned = True
+
+  def _warn(self, msg):
+    # This method will be overridden in a unit test to raise an error, because
+    # self.assertWarns is not universally implemented.
+    return tf_logging.warn(msg)
 
   def compute_mask(self, inputs, mask=None):
     if callable(self.mask):
diff --git a/tensorflow/python/keras/layers/core_test.py b/tensorflow/python/keras/layers/core_test.py
index aa7b42d0e950aa..8e6ad99873cff2 100644
--- a/tensorflow/python/keras/layers/core_test.py
+++ b/tensorflow/python/keras/layers/core_test.py
@@ -18,6 +18,8 @@
 from __future__ import division
 from __future__ import print_function
 
+import textwrap
+
 import numpy as np
 
 from tensorflow.python import keras
@@ -225,17 +227,6 @@ def test_lambda_config_serialization(self):
     self.assertAllEqual(layer._output_shape, (1, 1))
     self.assertAllEqual(layer.mask(1, True), True)
 
-  def test_lambda_with_variable(self):
-
-    def fn(x):
-      return x * variables.Variable(2., name='multiplier')
-
-    layer = keras.layers.Lambda(fn)
-    for _ in range(10):
-      layer(np.ones((10, 10), 'float32'))
-    self.assertLen(layer.trainable_weights, 1)
-    self.assertEqual(layer.trainable_weights[0].name, 'lambda/multiplier:0')
-
   def test_lambda_with_training_arg(self):
 
     def fn(x, training=True):
@@ -283,19 +274,25 @@ def add_one(inputs):
     expected_out = ragged_factory_ops.constant([[2.0], [3.0, 4.0]])
     self.assertAllClose(out, expected_out)
 
+
 class TestStatefulLambda(keras_parameterized.TestCase):
 
   @keras_parameterized.run_all_keras_modes
   @keras_parameterized.run_with_all_model_types
   def test_lambda_with_variable_in_model(self):
-
-    def lambda_fn(x):
-      # Variable will only get created once.
-      v = variables.Variable(1., trainable=True)
+    v = variables.Variable(1., trainable=True)
+    def lambda_fn(x, v):
       return x * v
 
-    model = testing_utils.get_model_from_layers(
-        [keras.layers.Lambda(lambda_fn)], input_shape=(10,))
+    # While it is generally not advised to mix Variables with Lambda layers, if
+    # the variables are explicitly set as attributes then they are still
+    # tracked. This is consistent with the base Layer behavior.
+    layer = keras.layers.Lambda(lambda_fn, arguments={'v': v})
+    self.assertLen(layer.trainable_weights, 0)
+    layer.v = v
+    self.assertLen(layer.trainable_weights, 1)
+
+    model = testing_utils.get_model_from_layers([layer], input_shape=(10,))
     model.compile(
         keras.optimizer_v2.gradient_descent.SGD(0.1),
         'mae',
@@ -306,6 +303,66 @@ def lambda_fn(x):
     self.assertLen(model.trainable_weights, 1)
     self.assertAllClose(keras.backend.get_value(model.trainable_weights[0]), 2.)
 
+  @keras_parameterized.run_all_keras_modes
+  @keras_parameterized.run_with_all_model_types
+  def test_creation_inside_lambda(self):
+    def lambda_fn(x):
+      scale = variables.Variable(1., trainable=True, name='scale')
+      shift = variables.Variable(1., trainable=True, name='shift')
+      return x * scale + shift
+
+    expected_error = textwrap.dedent(r'''
+    (    )?The following Variables were created within a Lambda layer \(shift_and_scale\)
+    (    )?but are not tracked by said layer:
+    (    )?  <tf.Variable \'.*shift_and_scale/scale:0\'.+
+    (    )?  <tf.Variable \'.*shift_and_scale/shift:0\'.+
+    (    )?The layer cannot safely ensure proper Variable reuse.+''')
+
+    with self.assertRaisesRegexp(ValueError, expected_error):
+      layer = keras.layers.Lambda(lambda_fn, name='shift_and_scale')
+      model = testing_utils.get_model_from_layers([layer], input_shape=(1,))
+      model(array_ops.ones((4, 1)))
+
+  @keras_parameterized.run_all_keras_modes
+  @keras_parameterized.run_with_all_model_types
+  def test_transitive_variable_creation(self):
+    dense = keras.layers.Dense(1, use_bias=False, kernel_initializer='ones')
+    def bad_lambda_fn(x):
+      return dense(x + 1)  # Dense layer is built on first call
+
+    expected_error = textwrap.dedent(r'''
+    (    )?The following Variables were created within a Lambda layer \(bias_dense\)
+    (    )?but are not tracked by said layer:
+    (    )?  <tf.Variable \'.*bias_dense/dense/kernel:0\'.+
+    (    )?The layer cannot safely ensure proper Variable reuse.+''')
+
+    with self.assertRaisesRegexp(ValueError, expected_error):
+      layer = keras.layers.Lambda(bad_lambda_fn, name='bias_dense')
+      model = testing_utils.get_model_from_layers([layer], input_shape=(1,))
+      model(array_ops.ones((4, 1)))
+
+  @keras_parameterized.run_all_keras_modes
+  @keras_parameterized.run_with_all_model_types
+  def test_warns_on_variable_capture(self):
+    v = variables.Variable(1., trainable=True)
+    def lambda_fn(x):
+      return x * v
+
+    expected_warning = textwrap.dedent(r'''
+    (    )?The following Variables were used a Lambda layer\'s call \(lambda\), but
+    (    )?are not present in its tracked objects:
+    (    )?  <tf.Variable \'.*Variable:0\'.+
+    (    )?It is possible that this is intended behavior.+''')
+
+    layer = keras.layers.Lambda(lambda_fn)
+    def patched_warn(msg):
+      raise ValueError(msg)
+    layer._warn = patched_warn
+
+    with self.assertRaisesRegexp(ValueError, expected_warning):
+      model = testing_utils.get_model_from_layers([layer], input_shape=(1,))
+      model(array_ops.ones((4, 1)))
+
 
 @keras_parameterized.run_all_keras_modes
 class CoreLayersTest(keras_parameterized.TestCase):

From 60680137916352e0be94caaa325f3b048c5f6e56 Mon Sep 17 00:00:00 2001
From: Taylor Robie <taylorrobie@google.com>
Date: Tue, 10 Dec 2019 08:09:36 -0800
Subject: [PATCH 080/130] include fix for test breakages by replacing
 functools.partial with a simpler kwargs dict.

---
 tensorflow/python/keras/layers/core.py | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/tensorflow/python/keras/layers/core.py b/tensorflow/python/keras/layers/core.py
index e4999466ba2d3d..3ebf94490d8e07 100644
--- a/tensorflow/python/keras/layers/core.py
+++ b/tensorflow/python/keras/layers/core.py
@@ -19,7 +19,6 @@
 from __future__ import print_function
 
 import copy
-import functools
 import sys
 import textwrap
 import types as python_types
@@ -785,7 +784,6 @@ def __init__(self, function, output_shape=None, mask=None, arguments=None,
 
     self.arguments = arguments or {}
     self.function = function
-    self._function_with_args = functools.partial(function, **self.arguments)
 
     if mask is not None:
       self.supports_masking = True
@@ -830,16 +828,13 @@ def _add_batch(shape):
     return nest.map_structure(_add_batch, output_shapes)
 
   def call(self, inputs, mask=None, training=None):
-    kwargs = {}
+    # We must copy for thread safety, but it only needs to be a shallow copy.
+    kwargs = {k: v for k, v in self.arguments.items()}
     if self._fn_expects_mask_arg:
       kwargs['mask'] = mask
     if self._fn_expects_training_arg:
       kwargs['training'] = training
 
-    call_fn = self._function_with_args
-    if kwargs:
-      call_fn = functools.partial(call_fn, **kwargs)
-
     created_variables = []
     def _variable_creator(next_creator, **kwargs):
       var = next_creator(**kwargs)
@@ -848,7 +843,7 @@ def _variable_creator(next_creator, **kwargs):
 
     with backprop.GradientTape(watch_accessed_variables=True) as tape,\
         variable_scope.variable_creator_scope(_variable_creator):
-      result = call_fn(inputs)
+      result = self.function(inputs, **kwargs)
     self._check_variables(created_variables, tape.watched_variables())
     return result
 

From 01464cf8b4b32fbd6c761fe4f4d76a556207fa93 Mon Sep 17 00:00:00 2001
From: Srinivas Vasudevan <srvasude@google.com>
Date: Thu, 21 Nov 2019 13:11:40 -0800
Subject: [PATCH 081/130] Expose ndtri and erfinv under tf.math.ndtri and
 tf.math.erfinv.

PiperOrigin-RevId: 281816005
Change-Id: Idded0bb39c0d32288f1bfa3d0288ba5847aa6fc1
---
 .../api_def/python_api/api_def_Erfinv.pbtxt   |  4 +++
 .../api_def/python_api/api_def_Ndtri.pbtxt    |  4 +++
 tensorflow/python/ops/math_ops.py             | 34 +++++++++++++++++++
 .../tools/api/golden/v1/tensorflow.math.pbtxt |  8 +++++
 .../tools/api/golden/v1/tensorflow.pbtxt      |  8 -----
 .../tools/api/golden/v2/tensorflow.math.pbtxt |  8 +++++
 .../tools/api/golden/v2/tensorflow.pbtxt      |  8 -----
 7 files changed, 58 insertions(+), 16 deletions(-)
 create mode 100644 tensorflow/core/api_def/python_api/api_def_Erfinv.pbtxt
 create mode 100644 tensorflow/core/api_def/python_api/api_def_Ndtri.pbtxt

diff --git a/tensorflow/core/api_def/python_api/api_def_Erfinv.pbtxt b/tensorflow/core/api_def/python_api/api_def_Erfinv.pbtxt
new file mode 100644
index 00000000000000..fae017dde2edca
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_Erfinv.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "Erfinv"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_Ndtri.pbtxt b/tensorflow/core/api_def/python_api/api_def_Ndtri.pbtxt
new file mode 100644
index 00000000000000..7e6e68ed45da74
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_Ndtri.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "Ndtri"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
index 0836e9f30c79d8..6a941015e621c9 100644
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@@ -109,6 +109,7 @@
 tf_export(v1=["arg_max"])(arg_max)
 tf_export(v1=["arg_min"])(arg_min)
 
+
 # This is set by resource_variable_ops.py. It is included in this way since
 # there is a circular dependency between math_ops and resource_variable_ops
 _resource_variable_type = None
@@ -4187,3 +4188,36 @@ def reciprocal_no_nan(x, name=None):
     x = ops.convert_to_tensor(x, name="x")
     one = constant_op.constant(1, dtype=x.dtype.base_dtype, name="one")
     return gen_math_ops.div_no_nan(one, x, name=scope)
+
+
+@tf_export("math.erfinv")
+@dispatch.add_dispatch_support
+def erfinv(x, name=None):
+  """Compute inverse error function.
+
+  Given `x`, compute the inverse error function of `x`. This function
+  is the inverse of `tf.math.erf`.
+
+  Args:
+    x: `Tensor` with type `float` or `double`.
+    name: A name for the operation (optional).
+  Returns:
+    Inverse error function of `x`.
+  """
+  with ops.name_scope(name, "erfinv", [x]):
+    return gen_math_ops.erfinv(x)
+
+
+@tf_export("math.ndtri")
+@dispatch.add_dispatch_support
+def ndtri(x, name=None):
+  """Compute quantile of Standard Normal.
+
+  Args:
+    x: `Tensor` with type `float` or `double`.
+    name: A name for the operation (optional).
+  Returns:
+    Inverse error function of `x`.
+  """
+  with ops.name_scope(name, "ndtri", [x]):
+    return gen_math_ops.ndtri(x)
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.math.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.math.pbtxt
index bf7812a668d830..c904681f633ba1 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.math.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.math.pbtxt
@@ -140,6 +140,10 @@ tf_module {
     name: "erfc"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "erfinv"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "exp"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -280,6 +284,10 @@ tf_module {
     name: "multiply_no_nan"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "ndtri"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "negative"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
index 6c75ecb5fbf4f0..5b9747c1cef0f4 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
@@ -1236,10 +1236,6 @@ tf_module {
     name: "erfc"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
-  member_method {
-    name: "erfinv"
-    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
   member_method {
     name: "executing_eagerly"
     argspec: "args=[], varargs=None, keywords=None, defaults=None"
@@ -1716,10 +1712,6 @@ tf_module {
     name: "multiply"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
-  member_method {
-    name: "ndtri"
-    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
   member_method {
     name: "negative"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.math.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.math.pbtxt
index 82688f51640b38..2ec2ab27476270 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.math.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.math.pbtxt
@@ -140,6 +140,10 @@ tf_module {
     name: "erfc"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "erfinv"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "exp"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -280,6 +284,10 @@ tf_module {
     name: "multiply_no_nan"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "ndtri"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "negative"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
index d67870a92b8c17..7cf14d69e49f7a 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
@@ -624,10 +624,6 @@ tf_module {
     name: "equal"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
-  member_method {
-    name: "erfinv"
-    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
   member_method {
     name: "executing_eagerly"
     argspec: "args=[], varargs=None, keywords=None, defaults=None"
@@ -812,10 +808,6 @@ tf_module {
     name: "multiply"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
-  member_method {
-    name: "ndtri"
-    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
   member_method {
     name: "negative"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "

From 659d0419bf17bd36e01c7d19f4f2982ff7c388a4 Mon Sep 17 00:00:00 2001
From: Duncan Riach <duncan@nvidia.com>
Date: Tue, 10 Dec 2019 13:52:30 -0800
Subject: [PATCH 082/130] Fix small issues in the version 2.1 release note

---
 RELEASE.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/RELEASE.md b/RELEASE.md
index 281f755c5c07f5..ed516eb59e6917 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -20,7 +20,7 @@ TensorFlow 2.1 will be the last TF release supporting Python 2. Python 2 support
   * Changes rebatching for `tf.data datasets` + distribution strategies for better performance.   Note that the dataset also behaves slightly differently, in that the rebatched dataset cardinality will always be a multiple of the number of replicas.
 * `TensorRT`
   * [TensorRT 6.0](https://developer.nvidia.com/tensorrt#tensorrt-whats-new) is now supported and enabled by default. This adds support for more TensorFlow ops including Conv3D, Conv3DBackpropInputV2, AvgPool3D, MaxPool3D, ResizeBilinear, and ResizeNearestNeighbor. In addition, the TensorFlow-TensorRT python conversion API is exported as `tf.experimental.tensorrt.Converter`.
-  * Environment variable `TF_DETERMINISTIC_OPS` added. When set to "true" or "1", this environment variable makes `tf.nn.bias_add` operate deterministically (i.e. reproducibly) when XLA JIT compilation is *not* enabled. It also makes cuDNN convolution and max-pooling operate deterministically. This makes Keras Conv*D and MaxPool*D layers operate deterministically in both the forward and backward directions when running on a CUDA-enabled GPU.
+* Environment variable `TF_DETERMINISTIC_OPS` has been added. When set to "true" or "1", this environment variable makes `tf.nn.bias_add` operate deterministically (i.e. reproducibly), but currently only when XLA JIT compilation is *not* enabled. `TF_DETERMINISTIC_OPS` also makes cuDNN convolution and max-pooling operate deterministically. This makes Keras Conv\*D and MaxPool\*D layers operate deterministically in both the forward and backward directions when running on a CUDA-enabled GPU.
 
 ## Known issues
 Because of [issues with building on windows](https://github.com/tensorflow/tensorflow/issues/10521), we turned off eigen strong inlining for the Windows builds. Windows binaries are expected to be slightly slower until the build issues are resolved. 

From 6bb0194c2388bead93eeecf2f965f4e962297e78 Mon Sep 17 00:00:00 2001
From: Duncan Riach <duncan@nvidia.com>
Date: Tue, 10 Dec 2019 14:28:31 -0800
Subject: [PATCH 083/130] Small enhancement to text in version 2.1 release
 notes

---
 RELEASE.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/RELEASE.md b/RELEASE.md
index ed516eb59e6917..206c4be8b79d6e 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -20,7 +20,7 @@ TensorFlow 2.1 will be the last TF release supporting Python 2. Python 2 support
   * Changes rebatching for `tf.data datasets` + distribution strategies for better performance.   Note that the dataset also behaves slightly differently, in that the rebatched dataset cardinality will always be a multiple of the number of replicas.
 * `TensorRT`
   * [TensorRT 6.0](https://developer.nvidia.com/tensorrt#tensorrt-whats-new) is now supported and enabled by default. This adds support for more TensorFlow ops including Conv3D, Conv3DBackpropInputV2, AvgPool3D, MaxPool3D, ResizeBilinear, and ResizeNearestNeighbor. In addition, the TensorFlow-TensorRT python conversion API is exported as `tf.experimental.tensorrt.Converter`.
-* Environment variable `TF_DETERMINISTIC_OPS` has been added. When set to "true" or "1", this environment variable makes `tf.nn.bias_add` operate deterministically (i.e. reproducibly), but currently only when XLA JIT compilation is *not* enabled. `TF_DETERMINISTIC_OPS` also makes cuDNN convolution and max-pooling operate deterministically. This makes Keras Conv\*D and MaxPool\*D layers operate deterministically in both the forward and backward directions when running on a CUDA-enabled GPU.
+* Environment variable `TF_DETERMINISTIC_OPS` has been added. When set to "true" or "1", this environment variable makes `tf.nn.bias_add` operate deterministically (i.e. reproducibly), but currently only when XLA JIT compilation is *not* enabled. Setting `TF_DETERMINISTIC_OPS` to "true" or "1" also makes cuDNN convolution and max-pooling operate deterministically. This makes Keras Conv\*D and MaxPool\*D layers operate deterministically in both the forward and backward directions when running on a CUDA-enabled GPU.
 
 ## Known issues
 Because of [issues with building on windows](https://github.com/tensorflow/tensorflow/issues/10521), we turned off eigen strong inlining for the Windows builds. Windows binaries are expected to be slightly slower until the build issues are resolved. 

From 688c9d1d9434045f2b89fe19fd2e51a41e5560fa Mon Sep 17 00:00:00 2001
From: Reed Wanderman-Milne <reedwm@google.com>
Date: Wed, 11 Dec 2019 14:15:13 -0800
Subject: [PATCH 084/130] Unexpose LossScaleGradientTape.

It doesn't support DistributionStrategy. It will be reexposed when it does. I tried to fix this in #34974, but only made the issue worse. The issue is that when taking gradients with respect to variables (which occurs almost every time), it would crash with a very long error message when DistributionStrategy is used. The unit tests only tested taking gradients w.r.t. constants, as it was assumed there would be no functional difference between taking gradients w.r.t. variables and constants.

PiperOrigin-RevId: 285059221
Change-Id: I9ffc5d68f092f9ff3ea634b9523b67ff2bbc4bd7
---
 .../loss_scaling_gradient_tape.py             |  3 +-
 ...perimental.-loss-scale-gradient-tape.pbtxt | 38 -------------------
 ...sorflow.mixed_precision.experimental.pbtxt |  4 --
 3 files changed, 1 insertion(+), 44 deletions(-)
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.mixed_precision.experimental.-loss-scale-gradient-tape.pbtxt

diff --git a/tensorflow/python/training/experimental/loss_scaling_gradient_tape.py b/tensorflow/python/training/experimental/loss_scaling_gradient_tape.py
index caae7052b84a70..0afe4c78cafc3f 100644
--- a/tensorflow/python/training/experimental/loss_scaling_gradient_tape.py
+++ b/tensorflow/python/training/experimental/loss_scaling_gradient_tape.py
@@ -25,10 +25,9 @@
 from tensorflow.python.ops.unconnected_gradients import UnconnectedGradients
 from tensorflow.python.training.experimental import loss_scale as loss_scale_module
 from tensorflow.python.util import nest
-from tensorflow.python.util.tf_export import tf_export
 
 
-@tf_export("mixed_precision.experimental.LossScaleGradientTape", v1=[])
+# TODO(reedwm): Expose this. Currently it doesn't work with DistributionStrategy
 class LossScaleGradientTape(backprop.GradientTape):
   """A gradient tape that scales losses and unscales resulting gradients.
 
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.mixed_precision.experimental.-loss-scale-gradient-tape.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.mixed_precision.experimental.-loss-scale-gradient-tape.pbtxt
deleted file mode 100644
index 7f4715832e20ca..00000000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.mixed_precision.experimental.-loss-scale-gradient-tape.pbtxt
+++ /dev/null
@@ -1,38 +0,0 @@
-path: "tensorflow.mixed_precision.experimental.LossScaleGradientTape"
-tf_class {
-  is_instance: "<class \'tensorflow.python.training.experimental.loss_scaling_gradient_tape.LossScaleGradientTape\'>"
-  is_instance: "<class \'tensorflow.python.eager.backprop.GradientTape\'>"
-  is_instance: "<type \'object\'>"
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'loss_scale\', \'persistent\', \'watch_accessed_variables\'], varargs=None, keywords=None, defaults=[\'False\', \'True\'], "
-  }
-  member_method {
-    name: "batch_jacobian"
-    argspec: "args=[\'self\', \'target\', \'source\', \'unconnected_gradients\', \'parallel_iterations\', \'experimental_use_pfor\'], varargs=None, keywords=None, defaults=[\'UnconnectedGradients.NONE\', \'None\', \'True\'], "
-  }
-  member_method {
-    name: "gradient"
-    argspec: "args=[\'self\', \'target\', \'sources\', \'output_gradients\', \'unconnected_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'UnconnectedGradients.NONE\'], "
-  }
-  member_method {
-    name: "jacobian"
-    argspec: "args=[\'self\', \'target\', \'sources\', \'unconnected_gradients\', \'parallel_iterations\', \'experimental_use_pfor\'], varargs=None, keywords=None, defaults=[\'UnconnectedGradients.NONE\', \'None\', \'True\'], "
-  }
-  member_method {
-    name: "reset"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "stop_recording"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "watch"
-    argspec: "args=[\'self\', \'tensor\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "watched_variables"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.mixed_precision.experimental.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.mixed_precision.experimental.pbtxt
index 5abfdcd109d210..61700226fbbfa5 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.mixed_precision.experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.mixed_precision.experimental.pbtxt
@@ -12,8 +12,4 @@ tf_module {
     name: "LossScale"
     mtype: "<type \'type\'>"
   }
-  member {
-    name: "LossScaleGradientTape"
-    mtype: "<type \'type\'>"
-  }
 }

From 7b7db9e5adff61cc769816aaeacfe03183f4dc32 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franc=CC=A7ois=20Chollet?= <francois.chollet@gmail.com>
Date: Thu, 12 Dec 2019 14:14:43 -0800
Subject: [PATCH 085/130] Unify V1/2 layer naming in internal imports.

---
 tensorflow/python/keras/layers/__init__.py    | 65 +++++++++++++++----
 .../keras/layers/cudnn_recurrent_test.py      |  2 +-
 .../keras/layers/rnn_cell_wrapper_v2_test.py  |  2 +-
 .../python/keras/saving/hdf5_format_test.py   |  4 +-
 tensorflow/python/layers/normalization.py     |  6 +-
 5 files changed, 58 insertions(+), 21 deletions(-)

diff --git a/tensorflow/python/keras/layers/__init__.py b/tensorflow/python/keras/layers/__init__.py
index 87dfa34f932563..07cb1bdf1b3164 100644
--- a/tensorflow/python/keras/layers/__init__.py
+++ b/tensorflow/python/keras/layers/__init__.py
@@ -18,8 +18,11 @@
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.python import tf2
+
 # Generic layers.
 # pylint: disable=g-bad-import-order
+# pylint: disable=g-import-not-at-top
 from tensorflow.python.keras.engine.input_layer import Input
 from tensorflow.python.keras.engine.input_layer import InputLayer
 from tensorflow.python.keras.engine.input_spec import InputSpec
@@ -27,10 +30,20 @@
 from tensorflow.python.keras.engine.base_preprocessing_layer import PreprocessingLayer
 
 # Preprocessing layers.
-from tensorflow.python.keras.layers.preprocessing.normalization import Normalization
-from tensorflow.python.keras.layers.preprocessing.normalization_v1 import Normalization as NormalizationV1
-from tensorflow.python.keras.layers.preprocessing.text_vectorization import TextVectorization
-from tensorflow.python.keras.layers.preprocessing.text_vectorization_v1 import TextVectorization as TextVectorizationV1
+if tf2.enabled():
+  from tensorflow.python.keras.layers.preprocessing.normalization import Normalization
+  from tensorflow.python.keras.layers.preprocessing.normalization_v1 import Normalization as NormalizationV1
+  NormalizationV2 = Normalization
+  from tensorflow.python.keras.layers.preprocessing.text_vectorization import TextVectorization
+  from tensorflow.python.keras.layers.preprocessing.text_vectorization_v1 import TextVectorization as TextVectorizationV1
+  TextVectorizationV2 = TextVectorization
+else:
+  from tensorflow.python.keras.layers.preprocessing.normalization_v1 import Normalization
+  from tensorflow.python.keras.layers.preprocessing.normalization import Normalization as NormalizationV2
+  NormalizationV1 = Normalization
+  from tensorflow.python.keras.layers.preprocessing.text_vectorization_v1 import TextVectorization
+  from tensorflow.python.keras.layers.preprocessing.text_vectorization import TextVectorization as TextVectorizationV2
+  TextVectorizationV1 = TextVectorization
 
 # Advanced activations.
 from tensorflow.python.keras.layers.advanced_activations import LeakyReLU
@@ -121,8 +134,14 @@
 
 # Normalization layers.
 from tensorflow.python.keras.layers.normalization import LayerNormalization
-from tensorflow.python.keras.layers.normalization import BatchNormalization
-from tensorflow.python.keras.layers.normalization_v2 import BatchNormalization as BatchNormalizationV2
+if tf2.enabled():
+  from tensorflow.python.keras.layers.normalization_v2 import BatchNormalization
+  from tensorflow.python.keras.layers.normalization import BatchNormalization as BatchNormalizationV1
+  BatchNormalizationV2 = BatchNormalization
+else:
+  from tensorflow.python.keras.layers.normalization import BatchNormalization
+  from tensorflow.python.keras.layers.normalization_v2 import BatchNormalization as BatchNormalizationV2
+  BatchNormalizationV1 = BatchNormalization
 
 # Kernelized layers.
 from tensorflow.python.keras.layers.kernelized import RandomFourierFeatures
@@ -163,14 +182,32 @@
 from tensorflow.python.keras.layers.recurrent import PeepholeLSTMCell
 from tensorflow.python.keras.layers.recurrent import SimpleRNN
 
-from tensorflow.python.keras.layers.recurrent import GRU
-from tensorflow.python.keras.layers.recurrent import GRUCell
-from tensorflow.python.keras.layers.recurrent import LSTM
-from tensorflow.python.keras.layers.recurrent import LSTMCell
-from tensorflow.python.keras.layers.recurrent_v2 import GRU as GRU_v2
-from tensorflow.python.keras.layers.recurrent_v2 import GRUCell as GRUCell_v2
-from tensorflow.python.keras.layers.recurrent_v2 import LSTM as LSTM_v2
-from tensorflow.python.keras.layers.recurrent_v2 import LSTMCell as LSTMCell_v2
+if tf2.enabled():
+  from tensorflow.python.keras.layers.recurrent_v2 import GRU
+  from tensorflow.python.keras.layers.recurrent_v2 import GRUCell
+  from tensorflow.python.keras.layers.recurrent_v2 import LSTM
+  from tensorflow.python.keras.layers.recurrent_v2 import LSTMCell
+  from tensorflow.python.keras.layers.recurrent import GRU as GRUV1
+  from tensorflow.python.keras.layers.recurrent import GRUCell as GRUCellV1
+  from tensorflow.python.keras.layers.recurrent import LSTM as LSTMV1
+  from tensorflow.python.keras.layers.recurrent import LSTMCell as LSTMCellV1
+  GRUV2 = GRU
+  GRUCellV2 = GRUCell
+  LSTMV2 = LSTM
+  LSTMCellV2 = LSTMCell
+else:
+  from tensorflow.python.keras.layers.recurrent import GRU
+  from tensorflow.python.keras.layers.recurrent import GRUCell
+  from tensorflow.python.keras.layers.recurrent import LSTM
+  from tensorflow.python.keras.layers.recurrent import LSTMCell
+  from tensorflow.python.keras.layers.recurrent_v2 import GRU as GRUV2
+  from tensorflow.python.keras.layers.recurrent_v2 import GRUCell as GRUCellV2
+  from tensorflow.python.keras.layers.recurrent_v2 import LSTM as LSTMV2
+  from tensorflow.python.keras.layers.recurrent_v2 import LSTMCell as LSTMCellV2
+  GRUV1 = GRU
+  GRUCellV1 = GRUCell
+  LSTMV1 = LSTM
+  LSTMCellV1 = LSTMCell
 
 # Convolutional-recurrent layers.
 from tensorflow.python.keras.layers.convolutional_recurrent import ConvLSTM2D
diff --git a/tensorflow/python/keras/layers/cudnn_recurrent_test.py b/tensorflow/python/keras/layers/cudnn_recurrent_test.py
index e3e193c3b63252..1c20918ffc8e95 100644
--- a/tensorflow/python/keras/layers/cudnn_recurrent_test.py
+++ b/tensorflow/python/keras/layers/cudnn_recurrent_test.py
@@ -460,7 +460,7 @@ def test_preprocess_weights_for_loading_gru_incompatible(self):
     input_shape = (3, 5)
 
     def gru(cudnn=False, **kwargs):
-      layer_class = keras.layers.CuDNNGRU if cudnn else keras.layers.GRU
+      layer_class = keras.layers.CuDNNGRU if cudnn else keras.layers.GRUV1
       return layer_class(2, input_shape=input_shape, **kwargs)
 
     def get_layer_weights(layer):
diff --git a/tensorflow/python/keras/layers/rnn_cell_wrapper_v2_test.py b/tensorflow/python/keras/layers/rnn_cell_wrapper_v2_test.py
index 15cbf68c87a643..a01e56be09797b 100644
--- a/tensorflow/python/keras/layers/rnn_cell_wrapper_v2_test.py
+++ b/tensorflow/python/keras/layers/rnn_cell_wrapper_v2_test.py
@@ -256,7 +256,7 @@ def testDroputWrapperWithKerasLSTMCell(self):
     with self.assertRaisesRegexp(ValueError, "does not work with "):
       wrapper_cls(cell)
 
-    cell = layers.LSTMCell_v2(10)
+    cell = layers.LSTMCellV2(10)
     with self.assertRaisesRegexp(ValueError, "does not work with "):
       wrapper_cls(cell)
 
diff --git a/tensorflow/python/keras/saving/hdf5_format_test.py b/tensorflow/python/keras/saving/hdf5_format_test.py
index 96557410030ad4..19340c1d86dda8 100644
--- a/tensorflow/python/keras/saving/hdf5_format_test.py
+++ b/tensorflow/python/keras/saving/hdf5_format_test.py
@@ -145,7 +145,7 @@ def test_weight_preprocessing(self):
             (None, input_dim, 4, 4, 4),
         ],
         [
-            (keras.layers.GRU(output_dim)),
+            (keras.layers.GRUV1(output_dim)),
             [np.random.random((input_dim, output_dim)),
              np.random.random((output_dim, output_dim)),
              np.random.random((output_dim,)),
@@ -158,7 +158,7 @@ def test_weight_preprocessing(self):
             (None, 4, input_dim),
         ],
         [
-            (keras.layers.LSTM(output_dim)),
+            (keras.layers.LSTMV1(output_dim)),
             [np.random.random((input_dim, output_dim)),
              np.random.random((output_dim, output_dim)),
              np.random.random((output_dim,)),
diff --git a/tensorflow/python/layers/normalization.py b/tensorflow/python/layers/normalization.py
index c6f06069d7c9a6..2554721eca25b3 100644
--- a/tensorflow/python/layers/normalization.py
+++ b/tensorflow/python/layers/normalization.py
@@ -20,7 +20,7 @@
 from __future__ import print_function
 
 
-from tensorflow.python.keras import layers as keras_layers
+from tensorflow.python.keras.layers import normalization as keras_normalization
 from tensorflow.python.layers import base
 from tensorflow.python.ops import init_ops
 from tensorflow.python.util import deprecation
@@ -28,7 +28,7 @@
 
 
 @tf_export(v1=['layers.BatchNormalization'])
-class BatchNormalization(keras_layers.BatchNormalization, base.Layer):
+class BatchNormalization(keras_normalization.BatchNormalization, base.Layer):
   """Batch Normalization layer from http://arxiv.org/abs/1502.03167.
 
   "Batch Normalization: Accelerating Deep Network Training by Reducing
@@ -170,7 +170,7 @@ def call(self, inputs, training=False):
 @deprecation.deprecated(
     date=None, instructions='Use keras.layers.BatchNormalization instead.  In '
     'particular, `tf.control_dependencies(tf.GraphKeys.UPDATE_OPS)` should not '
-    'be used (consult the `tf.keras.layers.batch_normalization` '
+    'be used (consult the `tf.keras.layers.BatchNormalization` '
     'documentation).')
 @tf_export(v1=['layers.batch_normalization'])
 def batch_normalization(inputs,

From 4d0c7e456231024f5c613dbc59d2287b9a00a74c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franc=CC=A7ois=20Chollet?= <francois.chollet@gmail.com>
Date: Thu, 12 Dec 2019 15:46:25 -0800
Subject: [PATCH 086/130] Update release notes.

---
 RELEASE.md | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/RELEASE.md b/RELEASE.md
index 281f755c5c07f5..289b2b7c3d8809 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -6,18 +6,15 @@ TensorFlow 2.1 will be the last TF release supporting Python 2. Python 2 support
 * The `tensorflow` pip package now includes GPU support by default (same as `tensorflow-gpu`) for both Linux and Windows. This runs on machines with and without NVIDIA GPUs. `tensorflow-gpu` is still available, and CPU-only packages can be downloaded at `tensorflow-cpu` for users who are concerned about package size.
 * The `tensorflow` pip package is built with CUDA 10.1 and cuDNN 7.6.
 * `tf.keras`
-  * `Model.fit_generator`, `Model.evaluate_generator`, `Model.predict_generator`, `Model.train_on_batch`, `Model.test_on_batch`, and `Model.predict_on_batch` methods now respect the `run_eagerly` property, and will correctly run using tf.function by default.
-  * `Model.fit_generator`, `Model.evaluate_generator`, and `Model.predict_generator` are deprecated endpoints. They are subsumed by `Model.fit`, `Model.evaluate`, and `Model.predict` which now support generators and Sequences.
-  * Keras `.compile` `.fit` `.evaluate` and `.predict` are allowed to be outside of the DistributionStrategy scope, as long as the model was constructed inside of a scope.
-  * Keras `model.load_weights` now accepts `skip_mismatch` as an argument. This was available in external Keras, and has now been copied over to `tf.keras`.
+  * Experimental support for mixed precision is available on GPUs and Cloud TPUs. See [usage guide](https://www.tensorflow.org/guide/keras/mixed_precision).
   * Introduced the `TextVectorization` layer, which takes as input raw strings and takes care of text standardization, tokenization, n-gram generation, and vocabulary indexing. See this [end-to-end text classification example](https://colab.research.google.com/drive/1RvCnR7h0_l4Ekn5vINWToI9TNJdpUZB3). 
+  * Keras `.compile` `.fit` `.evaluate` and `.predict` are allowed to be outside of the DistributionStrategy scope, as long as the model was constructed inside of a scope.
   * Experimental support for Keras `.compile`, `.fit`, `.evaluate`, and `.predict` is available for Cloud TPU Pods.
   * Automatic outside compilation is now enabled for Cloud TPUs. This allows `tf.summary` to be used more conveniently with Cloud TPUs.
   * Dynamic batch sizes with DistributionStrategy and Keras are supported on Cloud TPUs.
-  * Experimental support for mixed precision is available on GPUs and Cloud TPUs.
   * Keras reference implementations for many popular models are available in the TensorFlow [Model Garden](https://github.com/tensorflow/models/tree/master/official).
 * `tf.data`
-  * Changes rebatching for `tf.data datasets` + distribution strategies for better performance.   Note that the dataset also behaves slightly differently, in that the rebatched dataset cardinality will always be a multiple of the number of replicas.
+  * Changes rebatching for `tf.data datasets` + DistributionStrategy for better performance. Note that the dataset also behaves slightly differently, in that the rebatched dataset cardinality will always be a multiple of the number of replicas.
 * `TensorRT`
   * [TensorRT 6.0](https://developer.nvidia.com/tensorrt#tensorrt-whats-new) is now supported and enabled by default. This adds support for more TensorFlow ops including Conv3D, Conv3DBackpropInputV2, AvgPool3D, MaxPool3D, ResizeBilinear, and ResizeNearestNeighbor. In addition, the TensorFlow-TensorRT python conversion API is exported as `tf.experimental.tensorrt.Converter`.
   * Environment variable `TF_DETERMINISTIC_OPS` added. When set to "true" or "1", this environment variable makes `tf.nn.bias_add` operate deterministically (i.e. reproducibly) when XLA JIT compilation is *not* enabled. It also makes cuDNN convolution and max-pooling operate deterministically. This makes Keras Conv*D and MaxPool*D layers operate deterministically in both the forward and backward directions when running on a CUDA-enabled GPU.
@@ -36,7 +33,7 @@ Because of [issues with building on windows](https://github.com/tensorflow/tenso
 
 ## Bug Fixes and Other Changes
 * `tf.data`
-  * Fixes concurrency issue with `tf.data.experimental.parallel_interleave` with sloppy=True.
+  * Fixes concurrency issue with `tf.data.experimental.parallel_interleave` with `sloppy=True`.
   * Add `tf.data.experimental.dense_to_ragged_batch()`.
   * Extend `tf.data` parsing ops to support `RaggedTensors`.
 * `tf.distribute`
@@ -45,9 +42,11 @@ Because of [issues with building on windows](https://github.com/tensorflow/tenso
   * Added option in `tf.estimator.CheckpointSaverHook` to not save the `GraphDef`.
   * Moving the checkpoint reader from swig to pybind11.
 * `tf.keras`
-  * Export depthwise_conv2d in `tf.keras.backend`.
+  * Export `depthwise_conv2d` in `tf.keras.backend`.
   * In Keras Layers and Models, Variables in `trainable_weights`, `non_trainable_weights`, and `weights` are explicitly deduplicated.
-  * Fix the incorrect stateful behavior of Keras convolutional layers.
+  * Keras `model.load_weights` now accepts `skip_mismatch` as an argument. This was available in external Keras, and has now been copied over to `tf.keras`.
+  * Fix the input shape caching behavior of Keras convolutional layers.
+  * `Model.fit_generator`, `Model.evaluate_generator`, `Model.predict_generator`, `Model.train_on_batch`, `Model.test_on_batch`, and `Model.predict_on_batch` methods now respect the `run_eagerly` property, and will correctly run using `tf.function` by default. Note that `Model.fit_generator`, `Model.evaluate_generator`, and `Model.predict_generator` are deprecated endpoints. They are subsumed by `Model.fit`, `Model.evaluate`, and `Model.predict` which now support generators and Sequences.
 * `tf.lite`
   * Legalization for `NMS` ops in TFLite.
   * add `narrow_range` and `axis` to `quantize_v2` and `dequantize` ops. 

From 2be67201469da89a0c0a2f74d61f4569a20716db Mon Sep 17 00:00:00 2001
From: Austin Anderson <angerson@google.com>
Date: Fri, 13 Dec 2019 14:51:04 -0800
Subject: [PATCH 087/130] Add msvcp140_1.dll to list of import-time-check
 Windows DLLs

Resolves https://github.com/tensorflow/tensorflow/issues/35036

For TensorFlow 2.1.0rc1, the TensorFlow team built Windows packages with Microsoft Visual Studio 2019 16.4, upgraded from Visual Studio 2017. As discovered in the issue linked above, this caused an import error for Windows TF Python whls, because the build upgrade pulled in an additional Visual C++ DLL dependency, `msvcp140_1.dll`, which can be found in the latest Visual C++ package for all Visual Studio releases since 2015 (https://support.microsoft.com/en-us/help/2977003/the-latest-supported-visual-c-downloads).

I discovered the missing DLL by unpacking the two wheels for rc0 and rc1 and separately running `dumpbin /DEPENDENTS tensorflow_core/python/_pywrap_tensorflow_internal.pyd` (thanks to @yifeif for help with this!).

In this change, I've updated the import-time checker to look for both `msvcp140_1.dll` and `msvcp140.dll` in a way that supports simple future additions to the list.

PiperOrigin-RevId: 285476568
Change-Id: Ia9727e50801a4ddad1ea30653a74478fb7aee4e8
---
 tensorflow/python/platform/self_check.py | 25 ++++++++++++++----------
 tensorflow/tensorflow.bzl                |  2 +-
 2 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/tensorflow/python/platform/self_check.py b/tensorflow/python/platform/self_check.py
index 33aed306467dc8..f6cf7705e1390d 100644
--- a/tensorflow/python/platform/self_check.py
+++ b/tensorflow/python/platform/self_check.py
@@ -42,17 +42,22 @@ def preload_check():
     # we load the Python extension, so that we can raise an actionable error
     # message if they are not found.
     import ctypes  # pylint: disable=g-import-not-at-top
-    if hasattr(build_info, "msvcp_dll_name"):
-      try:
-        ctypes.WinDLL(build_info.msvcp_dll_name)
-      except OSError:
+    if hasattr(build_info, "msvcp_dll_names"):
+      missing = []
+      for dll_name in build_info.msvcp_dll_names.split(","):
+        try:
+          ctypes.WinDLL(dll_name)
+        except OSError:
+          missing.append(dll_name)
+      if missing:
         raise ImportError(
-            "Could not find %r. TensorFlow requires that this DLL be "
-            "installed in a directory that is named in your %%PATH%% "
-            "environment variable. You may install this DLL by downloading "
-            "Visual C++ 2015 Redistributable Update 3 from this URL: "
-            "https://www.microsoft.com/en-us/download/details.aspx?id=53587"
-            % build_info.msvcp_dll_name)
+            "Could not find the DLL(s) %r. TensorFlow requires that these DLLs "
+            "be installed in a directory that is named in your %%PATH%% "
+            "environment variable. You may install these DLLs by downloading "
+            '"Microsoft C++ Redistributable for Visual Studio 2015, 2017 and '
+            '2019" for your platform from this URL: '
+            "https://support.microsoft.com/help/2977003/the-latest-supported-visual-c-downloads"
+            % " or ".join(missing))
   else:
     # TODO(mrry): Consider adding checks for the Linux and Mac OS X builds.
     pass
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index dfa7192ba84a32..954f06984b08bd 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -2368,7 +2368,7 @@ def tf_py_build_info_genrule(name, out, **kwargs):
             " --is_config_rocm " + if_rocm("True", "False") +
             " --key_value " +
             if_cuda(" cuda_version_number=$${TF_CUDA_VERSION:-} cudnn_version_number=$${TF_CUDNN_VERSION:-} ", "") +
-            if_windows(" msvcp_dll_name=msvcp140.dll ", "") +
+            if_windows(" msvcp_dll_names=msvcp140.dll,msvcp140_1.dll ", "") +
             if_windows_cuda(" ".join([
                 "nvcuda_dll_name=nvcuda.dll",
                 "cudart_dll_name=cudart64_$$(echo $${TF_CUDA_VERSION:-} | sed \"s/\\.//\").dll",

From 43d39964b497afd9eb54a3cb779e818cd2479702 Mon Sep 17 00:00:00 2001
From: Austin Anderson <angerson@google.com>
Date: Mon, 16 Dec 2019 12:06:56 -0800
Subject: [PATCH 088/130] Document new Windows DLL requirements

---
 RELEASE.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/RELEASE.md b/RELEASE.md
index 281f755c5c07f5..5ecf74bd1a47a1 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -4,6 +4,10 @@ TensorFlow 2.1 will be the last TF release supporting Python 2. Python 2 support
 
 ## Major Features and Improvements
 * The `tensorflow` pip package now includes GPU support by default (same as `tensorflow-gpu`) for both Linux and Windows. This runs on machines with and without NVIDIA GPUs. `tensorflow-gpu` is still available, and CPU-only packages can be downloaded at `tensorflow-cpu` for users who are concerned about package size.
+* **Windows users:** officially-released `tensorflow` Pip packages are now built with Visual
+  Studio 2019 version 16.4 in order to take advantage of the new `/d2ReducedOptimizeHugeFunctions` compiler flag. To use these new packages, you must install "Microsoft Visual C++ Redistributable for Visual Studio 2015, 2017 and 2019", available from Microsoft's website [here](https://support.microsoft.com/help/2977003/the-latest-supported-visual-c-downloads).
+  * This does not change the minimum required version for building TensorFlow from source on Windows.
+  * If the required DLLs, `msvcp140.dll` (old) and `msvcp140_1.dll` (new), are missing on your machine, `import tensorflow` will print a warning message.
 * The `tensorflow` pip package is built with CUDA 10.1 and cuDNN 7.6.
 * `tf.keras`
   * `Model.fit_generator`, `Model.evaluate_generator`, `Model.predict_generator`, `Model.train_on_batch`, `Model.test_on_batch`, and `Model.predict_on_batch` methods now respect the `run_eagerly` property, and will correctly run using tf.function by default.

From 79d993edbe21b51f390474ea81d404356e1cf487 Mon Sep 17 00:00:00 2001
From: Austin Anderson <angerson@google.com>
Date: Mon, 16 Dec 2019 14:16:11 -0800
Subject: [PATCH 089/130] Clarify installation messages

---
 RELEASE.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/RELEASE.md b/RELEASE.md
index 5ecf74bd1a47a1..f2ea738a3b59d4 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -4,10 +4,10 @@ TensorFlow 2.1 will be the last TF release supporting Python 2. Python 2 support
 
 ## Major Features and Improvements
 * The `tensorflow` pip package now includes GPU support by default (same as `tensorflow-gpu`) for both Linux and Windows. This runs on machines with and without NVIDIA GPUs. `tensorflow-gpu` is still available, and CPU-only packages can be downloaded at `tensorflow-cpu` for users who are concerned about package size.
-* **Windows users:** officially-released `tensorflow` Pip packages are now built with Visual
+* **Windows users:** Officially-released `tensorflow` Pip packages are now built with Visual
   Studio 2019 version 16.4 in order to take advantage of the new `/d2ReducedOptimizeHugeFunctions` compiler flag. To use these new packages, you must install "Microsoft Visual C++ Redistributable for Visual Studio 2015, 2017 and 2019", available from Microsoft's website [here](https://support.microsoft.com/help/2977003/the-latest-supported-visual-c-downloads).
-  * This does not change the minimum required version for building TensorFlow from source on Windows.
-  * If the required DLLs, `msvcp140.dll` (old) and `msvcp140_1.dll` (new), are missing on your machine, `import tensorflow` will print a warning message.
+  * This does not change the minimum required version for building TensorFlow from source on Windows, but builds enabling `EIGEN_STRONG_INLINE` can take over 48 hours to compile without this flag. Refer to `configure.py` for more information about `EIGEN_STRONG_INLINE` and `/d2ReducedOptimizeHugeFunctions`.
+  * If either of the required DLLs, `msvcp140.dll` (old) or `msvcp140_1.dll` (new), are missing on your machine, `import tensorflow` will print a warning message.
 * The `tensorflow` pip package is built with CUDA 10.1 and cuDNN 7.6.
 * `tf.keras`
   * `Model.fit_generator`, `Model.evaluate_generator`, `Model.predict_generator`, `Model.train_on_batch`, `Model.test_on_batch`, and `Model.predict_on_batch` methods now respect the `run_eagerly` property, and will correctly run using tf.function by default.

From 1696086a603808db771aae2ac411493c1bd2fc94 Mon Sep 17 00:00:00 2001
From: Austin Anderson <angerson@google.com>
Date: Mon, 16 Dec 2019 14:18:44 -0800
Subject: [PATCH 090/130] Return to long lines

---
 RELEASE.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/RELEASE.md b/RELEASE.md
index f2ea738a3b59d4..7688a90b88f875 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -4,8 +4,7 @@ TensorFlow 2.1 will be the last TF release supporting Python 2. Python 2 support
 
 ## Major Features and Improvements
 * The `tensorflow` pip package now includes GPU support by default (same as `tensorflow-gpu`) for both Linux and Windows. This runs on machines with and without NVIDIA GPUs. `tensorflow-gpu` is still available, and CPU-only packages can be downloaded at `tensorflow-cpu` for users who are concerned about package size.
-* **Windows users:** Officially-released `tensorflow` Pip packages are now built with Visual
-  Studio 2019 version 16.4 in order to take advantage of the new `/d2ReducedOptimizeHugeFunctions` compiler flag. To use these new packages, you must install "Microsoft Visual C++ Redistributable for Visual Studio 2015, 2017 and 2019", available from Microsoft's website [here](https://support.microsoft.com/help/2977003/the-latest-supported-visual-c-downloads).
+* **Windows users:** Officially-released `tensorflow` Pip packages are now built with Visual Studio 2019 version 16.4 in order to take advantage of the new `/d2ReducedOptimizeHugeFunctions` compiler flag. To use these new packages, you must install "Microsoft Visual C++ Redistributable for Visual Studio 2015, 2017 and 2019", available from Microsoft's website [here](https://support.microsoft.com/help/2977003/the-latest-supported-visual-c-downloads).
   * This does not change the minimum required version for building TensorFlow from source on Windows, but builds enabling `EIGEN_STRONG_INLINE` can take over 48 hours to compile without this flag. Refer to `configure.py` for more information about `EIGEN_STRONG_INLINE` and `/d2ReducedOptimizeHugeFunctions`.
   * If either of the required DLLs, `msvcp140.dll` (old) or `msvcp140_1.dll` (new), are missing on your machine, `import tensorflow` will print a warning message.
 * The `tensorflow` pip package is built with CUDA 10.1 and cuDNN 7.6.

From 846714eb4ec1a78b3ea2485ba1a0094c9363026b Mon Sep 17 00:00:00 2001
From: Milan Straka <milan@strakovi.com>
Date: Wed, 11 Dec 2019 09:06:59 +0100
Subject: [PATCH 091/130] Pass experimental_relax_shapes to instance methods...

decorated with `tf.function`.

Currently the `experimental_relax_shapes` is not passed,
so the instance methods ignore this argument.

Fixes #34905.

Note that the `experimental_relax_shapes` was named differently in
`def_function.Function` and `function.Function`, so the one
in `def_function` was renamed to start with underscore. It is a private
field, so it should be fine.
---
 tensorflow/python/eager/def_function.py      |  6 ++---
 tensorflow/python/eager/def_function_test.py |  2 +-
 tensorflow/python/eager/function.py          |  3 ++-
 tensorflow/python/eager/function_test.py     | 23 ++++++++++++++++++++
 4 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/tensorflow/python/eager/def_function.py b/tensorflow/python/eager/def_function.py
index 47e482cc754b79..68d8a3ead7e662 100644
--- a/tensorflow/python/eager/def_function.py
+++ b/tensorflow/python/eager/def_function.py
@@ -405,7 +405,7 @@ def embedding_matmul(a, b):
     self._implements = experimental_implements
     self._autograph = autograph
     self._experimental_autograph_options = experimental_autograph_options
-    self.experimental_relax_shapes = experimental_relax_shapes
+    self._experimental_relax_shapes = experimental_relax_shapes
     self._experimental_compile = experimental_compile
     self._created_variables = None  # GUARDED_BY(self._lock)
     self._stateful_fn = None  # GUARDED_BY(self._lock)
@@ -458,7 +458,7 @@ def _defun(self, fn):
         attributes=attributes,
         autograph=self._autograph,
         experimental_autograph_options=self._experimental_autograph_options,
-        experimental_relax_shapes=self.experimental_relax_shapes)
+        experimental_relax_shapes=self._experimental_relax_shapes)
 
   def _initialize(self, args, kwds, add_initializers_to=None):
     """Initializes, on the first call.
@@ -514,7 +514,7 @@ def _clone(self, python_function):
         autograph=self._autograph,
         experimental_implements=self._implements,
         experimental_autograph_options=self._experimental_autograph_options,
-        experimental_relax_shapes=self.experimental_relax_shapes,
+        experimental_relax_shapes=self._experimental_relax_shapes,
         experimental_compile=self._experimental_compile)
 
   def _decorate(self, decorator):
diff --git a/tensorflow/python/eager/def_function_test.py b/tensorflow/python/eager/def_function_test.py
index 0bebc89d2207e7..c7f8a25ae45aac 100644
--- a/tensorflow/python/eager/def_function_test.py
+++ b/tensorflow/python/eager/def_function_test.py
@@ -681,7 +681,7 @@ def testClone(self, input_signature, autograph, autograph_options, implements,
     self.assertEqual(autograph, cloned._autograph)
     self.assertEqual(implements, cloned._implements)
     self.assertEqual(autograph_options, cloned._experimental_autograph_options)
-    self.assertEqual(relax_shapes, cloned.experimental_relax_shapes)
+    self.assertEqual(relax_shapes, cloned._experimental_relax_shapes)
     self.assertEqual(compile_, cloned._experimental_compile)
 
     # This test does not run with XLA JIT support linked in so we can only check
diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 9fdc282105b7d9..810fc86b30da1e 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -3219,7 +3219,8 @@ def bound_method_wrapper(*args, **kwargs):
       tf_decorator.make_decorator(bound_method, bound_method_wrapper),
       name=original_function._name,
       autograph=original_function._autograph,
-      input_signature=original_function.input_signature)
+      input_signature=original_function.input_signature,
+      experimental_relax_shapes=original_function._experimental_relax_shapes)
   # pylint: enable=protected-access
 
   # And we wrap the function with tf_decorator so inspection works correctly
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index 20b21a478e491b..2653b4d31dde31 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -323,6 +323,29 @@ def func(a):
     self.assertTrue(unknown_dim[0])
     self.assertLen(total_function_cache(func), 2)
 
+  def testInputShapeRelaxationOnInstanceMethod(self):
+    # Test that experimental_relax_shapes is passed during
+    # instance method bounding.
+    unknown_dim = [False]
+
+    class Foo(object):
+
+      @def_function.function(experimental_relax_shapes=True)
+      def func(self, a):
+        if a._shape_tuple()[0] is None:
+          unknown_dim[0] = True
+        return a + 1
+
+    foo = Foo()
+    foo.func(constant_op.constant([]))
+    self.assertFalse(unknown_dim[0])
+
+    foo.func(constant_op.constant([1.0]))
+    self.assertFalse(unknown_dim[0])
+
+    foo.func(constant_op.constant([1.0, 2.0]))
+    self.assertTrue(unknown_dim[0])
+
   def testCapturesVariables(self):
     a = variables.Variable(1.0, trainable=False)
     b = variables.Variable(1.0)

From 4f49610f3b940fa1c7ec51a9a55eb3ff3e8b1d5b Mon Sep 17 00:00:00 2001
From: Mihai Maruseac <mihai.maruseac@gmail.com>
Date: Wed, 18 Dec 2019 13:38:25 -0800
Subject: [PATCH 092/130] Revert "[r2.1 cherry-pick] Fix pip package API
 generation"

---
 tensorflow/api_template.__init__.py                  | 10 ++++------
 tensorflow/api_template_v1.__init__.py               |  9 ++++-----
 .../python/tools/api/generator/create_python_api.py  | 12 +++---------
 .../tools/api/generator/create_python_api_test.py    | 10 +++++-----
 tensorflow/virtual_root_template_v1.__init__.py      |  3 +++
 tensorflow/virtual_root_template_v2.__init__.py      | 10 ++++++++++
 6 files changed, 29 insertions(+), 25 deletions(-)

diff --git a/tensorflow/api_template.__init__.py b/tensorflow/api_template.__init__.py
index c515cc76b9aacd..56d65d45faf0b1 100644
--- a/tensorflow/api_template.__init__.py
+++ b/tensorflow/api_template.__init__.py
@@ -119,11 +119,11 @@ def _running_from_pip_package():
       _current_file_location.startswith(dir_) for dir_ in _site_packages_dirs)
 
 if _running_from_pip_package():
-  for _s in _site_packages_dirs:
+  for s in _site_packages_dirs:
     # TODO(gunan): Add sanity checks to loaded modules here.
-    _plugin_dir = _os.path.join(_s, 'tensorflow-plugins')
-    if _fi.file_exists(_plugin_dir):
-      _ll.load_library(_plugin_dir)
+    plugin_dir = _os.path.join(s, 'tensorflow-plugins')
+    if _fi.file_exists(plugin_dir):
+      _ll.load_library(plugin_dir)
 
 # Add module aliases
 if hasattr(_current_module, 'keras'):
@@ -136,5 +136,3 @@ def _running_from_pip_package():
   setattr(_current_module, "optimizers", optimizers)
   setattr(_current_module, "initializers", initializers)
 # pylint: enable=undefined-variable
-
-# __all__ PLACEHOLDER
diff --git a/tensorflow/api_template_v1.__init__.py b/tensorflow/api_template_v1.__init__.py
index 2b2899c3fe031e..97478a18b8a20a 100644
--- a/tensorflow/api_template_v1.__init__.py
+++ b/tensorflow/api_template_v1.__init__.py
@@ -132,10 +132,9 @@ def _running_from_pip_package():
       _current_file_location.startswith(dir_) for dir_ in _site_packages_dirs)
 
 if _running_from_pip_package():
-  for _s in _site_packages_dirs:
+  for s in _site_packages_dirs:
     # TODO(gunan): Add sanity checks to loaded modules here.
-    _plugin_dir = _os.path.join(_s, 'tensorflow-plugins')
-    if _fi.file_exists(_plugin_dir):
-      _ll.load_library(_plugin_dir)
+    plugin_dir = _os.path.join(s, 'tensorflow-plugins')
+    if _fi.file_exists(plugin_dir):
+      _ll.load_library(plugin_dir)
 
-# __all__ PLACEHOLDER
diff --git a/tensorflow/python/tools/api/generator/create_python_api.py b/tensorflow/python/tools/api/generator/create_python_api.py
index 80f663683c3ee0..3af677322d67ee 100644
--- a/tensorflow/python/tools/api/generator/create_python_api.py
+++ b/tensorflow/python/tools/api/generator/create_python_api.py
@@ -243,12 +243,11 @@ def build(self):
     # from it using * import. Don't need this for lazy_loading because the
     # underscore symbols are already included in __all__ when passed in and
     # handled by TFModuleWrapper.
-    root_module_footer = ''
     if not self._lazy_loading:
       underscore_names_str = ', '.join(
           '\'%s\'' % name for name in self._underscore_names_in_root)
 
-      root_module_footer = '''
+      module_text_map[''] = module_text_map.get('', '') + '''
 _names_with_underscore = [%s]
 __all__ = [_s for _s in dir() if not _s.startswith('_')]
 __all__.extend([_s for _s in _names_with_underscore])
@@ -274,7 +273,7 @@ def build(self):
         footer_text_map[dest_module] = _DEPRECATION_FOOTER % (
             dest_module, public_apis_name, deprecation, has_lite)
 
-    return module_text_map, footer_text_map, root_module_footer
+    return module_text_map, footer_text_map
 
   def format_import(self, source_module_name, source_name, dest_name):
     """Formats import statement.
@@ -621,11 +620,7 @@ def create_api_files(output_files, packages, root_init_template, output_dir,
       os.makedirs(os.path.dirname(file_path))
     open(file_path, 'a').close()
 
-  (
-      module_text_map,
-      deprecation_footer_map,
-      root_module_footer,
-  ) = get_api_init_text(
+  module_text_map, deprecation_footer_map = get_api_init_text(
       packages, output_package, api_name,
       api_version, compat_api_versions, lazy_loading, use_relative_imports)
 
@@ -657,7 +652,6 @@ def create_api_files(output_files, packages, root_init_template, output_dir,
       with open(root_init_template, 'r') as root_init_template_file:
         contents = root_init_template_file.read()
         contents = contents.replace('# API IMPORTS PLACEHOLDER', text)
-        contents = contents.replace('# __all__ PLACEHOLDER', root_module_footer)
     elif module in compat_module_to_template:
       # Read base init file for compat module
       with open(compat_module_to_template[module], 'r') as init_template_file:
diff --git a/tensorflow/python/tools/api/generator/create_python_api_test.py b/tensorflow/python/tools/api/generator/create_python_api_test.py
index 76404d6c82b33b..010f189dcb27f0 100644
--- a/tensorflow/python/tools/api/generator/create_python_api_test.py
+++ b/tensorflow/python/tools/api/generator/create_python_api_test.py
@@ -62,7 +62,7 @@ def tearDown(self):
     del sys.modules[_MODULE_NAME]
 
   def testFunctionImportIsAdded(self):
-    imports, _, _ = create_python_api.get_api_init_text(
+    imports, _ = create_python_api.get_api_init_text(
         packages=[create_python_api._DEFAULT_PACKAGE],
         output_package='tensorflow',
         api_name='tensorflow',
@@ -97,7 +97,7 @@ def testFunctionImportIsAdded(self):
                      msg='compat.v1 in %s' % str(imports.keys()))
 
   def testClassImportIsAdded(self):
-    imports, _, _ = create_python_api.get_api_init_text(
+    imports, _ = create_python_api.get_api_init_text(
         packages=[create_python_api._DEFAULT_PACKAGE],
         output_package='tensorflow',
         api_name='tensorflow',
@@ -116,7 +116,7 @@ def testClassImportIsAdded(self):
         msg='%s not in %s' % (expected_import, str(imports)))
 
   def testConstantIsAdded(self):
-    imports, _, _ = create_python_api.get_api_init_text(
+    imports, _ = create_python_api.get_api_init_text(
         packages=[create_python_api._DEFAULT_PACKAGE],
         output_package='tensorflow',
         api_name='tensorflow',
@@ -132,7 +132,7 @@ def testConstantIsAdded(self):
                     msg='%s not in %s' % (expected, str(imports)))
 
   def testCompatModuleIsAdded(self):
-    imports, _, _ = create_python_api.get_api_init_text(
+    imports, _ = create_python_api.get_api_init_text(
         packages=[create_python_api._DEFAULT_PACKAGE],
         output_package='tensorflow',
         api_name='tensorflow',
@@ -144,7 +144,7 @@ def testCompatModuleIsAdded(self):
                     msg='compat.v1.test not in %s' % str(imports.keys()))
 
   def testNestedCompatModulesAreAdded(self):
-    imports, _, _ = create_python_api.get_api_init_text(
+    imports, _ = create_python_api.get_api_init_text(
         packages=[create_python_api._DEFAULT_PACKAGE],
         output_package='tensorflow',
         api_name='tensorflow',
diff --git a/tensorflow/virtual_root_template_v1.__init__.py b/tensorflow/virtual_root_template_v1.__init__.py
index 9a45bc0355d0b7..236e9f52258973 100644
--- a/tensorflow/virtual_root_template_v1.__init__.py
+++ b/tensorflow/virtual_root_template_v1.__init__.py
@@ -132,4 +132,7 @@ def _forward_module(old_name):
 except NameError:
   pass
 
+# Manually patch keras and estimator so tf.keras and tf.estimator work
+keras = _sys.modules["tensorflow.keras"]
+if not _root_estimator: estimator = _sys.modules["tensorflow.estimator"]
 # LINT.ThenChange(//tensorflow/virtual_root_template_v2.__init__.py.oss)
diff --git a/tensorflow/virtual_root_template_v2.__init__.py b/tensorflow/virtual_root_template_v2.__init__.py
index bd8c903e455db5..83c020182a8ee9 100644
--- a/tensorflow/virtual_root_template_v2.__init__.py
+++ b/tensorflow/virtual_root_template_v2.__init__.py
@@ -126,4 +126,14 @@ def _forward_module(old_name):
 except NameError:
   pass
 
+# TODO(mihaimaruseac): Revisit all of this once we release 2.1
+# Manually patch keras and estimator so tf.keras and tf.estimator work
+keras = _sys.modules["tensorflow.keras"]
+if not _root_estimator: estimator = _sys.modules["tensorflow.estimator"]
+# Also import module aliases
+try:
+  from tensorflow_core import losses, metrics, initializers, optimizers
+except ImportError:
+  pass
+
 # LINT.ThenChange(//tensorflow/virtual_root_template_v1.__init__.py.oss)

From 5d28926aba5c40f6d6e9c7034c1f631b3e2cdcac Mon Sep 17 00:00:00 2001
From: Milan Straka <milan@strakovi.com>
Date: Wed, 11 Dec 2019 09:06:59 +0100
Subject: [PATCH 093/130] Pass experimental_relax_shapes to instance methods...

decorated with `tf.function`.

Currently the `experimental_relax_shapes` is not passed,
so the instance methods ignore this argument.

Fixes #34905.

Note that the `experimental_relax_shapes` was named differently in
`def_function.Function` and `function.Function`, so the one
in `def_function` was renamed to start with underscore. It is a private
field, so it should be fine.
---
 tensorflow/python/eager/def_function.py      |  6 ++---
 tensorflow/python/eager/def_function_test.py |  2 +-
 tensorflow/python/eager/function.py          |  3 ++-
 tensorflow/python/eager/function_test.py     | 23 ++++++++++++++++++++
 4 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/tensorflow/python/eager/def_function.py b/tensorflow/python/eager/def_function.py
index 47e482cc754b79..68d8a3ead7e662 100644
--- a/tensorflow/python/eager/def_function.py
+++ b/tensorflow/python/eager/def_function.py
@@ -405,7 +405,7 @@ def embedding_matmul(a, b):
     self._implements = experimental_implements
     self._autograph = autograph
     self._experimental_autograph_options = experimental_autograph_options
-    self.experimental_relax_shapes = experimental_relax_shapes
+    self._experimental_relax_shapes = experimental_relax_shapes
     self._experimental_compile = experimental_compile
     self._created_variables = None  # GUARDED_BY(self._lock)
     self._stateful_fn = None  # GUARDED_BY(self._lock)
@@ -458,7 +458,7 @@ def _defun(self, fn):
         attributes=attributes,
         autograph=self._autograph,
         experimental_autograph_options=self._experimental_autograph_options,
-        experimental_relax_shapes=self.experimental_relax_shapes)
+        experimental_relax_shapes=self._experimental_relax_shapes)
 
   def _initialize(self, args, kwds, add_initializers_to=None):
     """Initializes, on the first call.
@@ -514,7 +514,7 @@ def _clone(self, python_function):
         autograph=self._autograph,
         experimental_implements=self._implements,
         experimental_autograph_options=self._experimental_autograph_options,
-        experimental_relax_shapes=self.experimental_relax_shapes,
+        experimental_relax_shapes=self._experimental_relax_shapes,
         experimental_compile=self._experimental_compile)
 
   def _decorate(self, decorator):
diff --git a/tensorflow/python/eager/def_function_test.py b/tensorflow/python/eager/def_function_test.py
index 0bebc89d2207e7..c7f8a25ae45aac 100644
--- a/tensorflow/python/eager/def_function_test.py
+++ b/tensorflow/python/eager/def_function_test.py
@@ -681,7 +681,7 @@ def testClone(self, input_signature, autograph, autograph_options, implements,
     self.assertEqual(autograph, cloned._autograph)
     self.assertEqual(implements, cloned._implements)
     self.assertEqual(autograph_options, cloned._experimental_autograph_options)
-    self.assertEqual(relax_shapes, cloned.experimental_relax_shapes)
+    self.assertEqual(relax_shapes, cloned._experimental_relax_shapes)
     self.assertEqual(compile_, cloned._experimental_compile)
 
     # This test does not run with XLA JIT support linked in so we can only check
diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 9fdc282105b7d9..810fc86b30da1e 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -3219,7 +3219,8 @@ def bound_method_wrapper(*args, **kwargs):
       tf_decorator.make_decorator(bound_method, bound_method_wrapper),
       name=original_function._name,
       autograph=original_function._autograph,
-      input_signature=original_function.input_signature)
+      input_signature=original_function.input_signature,
+      experimental_relax_shapes=original_function._experimental_relax_shapes)
   # pylint: enable=protected-access
 
   # And we wrap the function with tf_decorator so inspection works correctly
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index 20b21a478e491b..2653b4d31dde31 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -323,6 +323,29 @@ def func(a):
     self.assertTrue(unknown_dim[0])
     self.assertLen(total_function_cache(func), 2)
 
+  def testInputShapeRelaxationOnInstanceMethod(self):
+    # Test that experimental_relax_shapes is passed during
+    # instance method bounding.
+    unknown_dim = [False]
+
+    class Foo(object):
+
+      @def_function.function(experimental_relax_shapes=True)
+      def func(self, a):
+        if a._shape_tuple()[0] is None:
+          unknown_dim[0] = True
+        return a + 1
+
+    foo = Foo()
+    foo.func(constant_op.constant([]))
+    self.assertFalse(unknown_dim[0])
+
+    foo.func(constant_op.constant([1.0]))
+    self.assertFalse(unknown_dim[0])
+
+    foo.func(constant_op.constant([1.0, 2.0]))
+    self.assertTrue(unknown_dim[0])
+
   def testCapturesVariables(self):
     a = variables.Variable(1.0, trainable=False)
     b = variables.Variable(1.0)

From c97b2206163ec6fda0eac77ef26b095be0d0651d Mon Sep 17 00:00:00 2001
From: Milan Straka <milan@strakovi.com>
Date: Wed, 11 Dec 2019 08:40:37 +0100
Subject: [PATCH 094/130] Relax shapes for Keras _on_batch functions.

The current train,test,predict_on_batch functions use a regular
tf.function when not in eager mode, which causes a retrace for every
new batch size. Similarly, if sequences are passed on input, every
different sequence size causes a retrace.

Passing experimental_relax_shapes=True allow gracefully handle these
cases.

Fixes #34907.
---
 tensorflow/python/keras/engine/training_v2_utils.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/keras/engine/training_v2_utils.py b/tensorflow/python/keras/engine/training_v2_utils.py
index 596792e5fe571d..04b058949fa620 100644
--- a/tensorflow/python/keras/engine/training_v2_utils.py
+++ b/tensorflow/python/keras/engine/training_v2_utils.py
@@ -119,7 +119,9 @@ def _make_on_batch_function(model, mode):
     func = model
 
   if not model.run_eagerly:
-    func = def_function.function(func)
+    # Pass `experimental_relax_shapes` to avoid retracing for dynamic batch size,
+    # variable length sequences, etc.
+    func = def_function.function(func, experimental_relax_shapes=True)
 
   return func
 

From 2b8a5652a43c700dfc656d8ac0807dab7daecc8d Mon Sep 17 00:00:00 2001
From: Jeff Daily <jeff.daily@amd.com>
Date: Mon, 2 Dec 2019 19:01:13 +0000
Subject: [PATCH 095/130] eigen patch needed for HIP header changes

---
 third_party/eigen3/gpu_packet_math.patch | 159 ++++++++++++++++++++++-
 1 file changed, 158 insertions(+), 1 deletion(-)

diff --git a/third_party/eigen3/gpu_packet_math.patch b/third_party/eigen3/gpu_packet_math.patch
index 50ac056df79a68..1b6131abd41bc2 100644
--- a/third_party/eigen3/gpu_packet_math.patch
+++ b/third_party/eigen3/gpu_packet_math.patch
@@ -22,4 +22,161 @@
      return res;
    }
  };
- 
\ No newline at end of file
+--- a/unsupported/Eigen/SpecialFunctions
++++ b/unsupported/Eigen/SpecialFunctions
+@@ -48,6 +48,9 @@
+ }
+
+ #include "src/SpecialFunctions/SpecialFunctionsImpl.h"
++#if defined(EIGEN_HIPCC)
++#include "src/SpecialFunctions/HipVectorCompatibility.h"
++#endif
+ #include "src/SpecialFunctions/SpecialFunctionsPacketMath.h"
+ #include "src/SpecialFunctions/SpecialFunctionsHalf.h"
+ #include "src/SpecialFunctions/SpecialFunctionsFunctors.h"
+--- /dev/null
++++ b/unsupported/Eigen/src/SpecialFunctions/HipVectorCompatibility.h
+@@ -0,0 +1,143 @@
++#ifndef HIP_VECTOR_COMPATIBILITY_H
++#define HIP_VECTOR_COMPATIBILITY_H
++
++namespace hip_impl {
++  template <typename, typename, unsigned int> struct Scalar_accessor;
++}   // end namespace hip_impl
++
++namespace Eigen {
++namespace internal {
++
++#if EIGEN_HAS_C99_MATH
++template <typename T, typename U, unsigned int n>
++struct lgamma_impl<hip_impl::Scalar_accessor<T, U, n>> : lgamma_impl<T> {};
++#endif
++
++template <typename T, typename U, unsigned int n>
++struct digamma_impl_maybe_poly<hip_impl::Scalar_accessor<T, U, n>>
++  : digamma_impl_maybe_poly<T> {};
++
++template <typename T, typename U, unsigned int n>
++struct digamma_impl<hip_impl::Scalar_accessor<T, U, n>> : digamma_impl<T> {};
++
++#if EIGEN_HAS_C99_MATH
++template <typename T, typename U, unsigned int n>
++struct erf_impl<hip_impl::Scalar_accessor<T, U, n>> : erf_impl<T> {};
++#endif  // EIGEN_HAS_C99_MATH
++
++#if EIGEN_HAS_C99_MATH
++template <typename T, typename U, unsigned int n>
++struct erfc_impl<hip_impl::Scalar_accessor<T, U, n>> : erfc_impl<T> {};
++#endif  // EIGEN_HAS_C99_MATH
++
++#if EIGEN_HAS_C99_MATH
++template <typename T, typename U, unsigned int n>
++struct ndtri_impl<hip_impl::Scalar_accessor<T, U, n>> : ndtri_impl<T> {};
++#endif  // EIGEN_HAS_C99_MATH
++
++template <typename T, typename U, unsigned int n, IgammaComputationMode mode>
++struct igammac_cf_impl<hip_impl::Scalar_accessor<T, U, n>, mode>
++  : igammac_cf_impl<T, mode> {};
++
++template <typename T, typename U, unsigned int n, IgammaComputationMode mode>
++struct igamma_series_impl<hip_impl::Scalar_accessor<T, U, n>, mode>
++  : igamma_series_impl<T, mode> {};
++
++#if EIGEN_HAS_C99_MATH
++template <typename T, typename U, unsigned int n>
++struct igammac_impl<hip_impl::Scalar_accessor<T, U, n>> : igammac_impl<T> {};
++#endif  // EIGEN_HAS_C99_MATH
++
++#if EIGEN_HAS_C99_MATH
++template <typename T, typename U, unsigned int n, IgammaComputationMode mode>
++struct igamma_generic_impl<hip_impl::Scalar_accessor<T, U, n>, mode>
++  : igamma_generic_impl<T, mode> {};
++#endif  // EIGEN_HAS_C99_MATH
++
++template <typename T, typename U, unsigned int n>
++struct igamma_impl<hip_impl::Scalar_accessor<T, U, n>> : igamma_impl<T> {};
++
++template <typename T, typename U, unsigned int n>
++struct igamma_der_a_retval<hip_impl::Scalar_accessor<T, U, n>>
++  : igamma_der_a_retval<T> {};
++
++template <typename T, typename U, unsigned int n>
++struct igamma_der_a_impl<hip_impl::Scalar_accessor<T, U, n>>
++  : igamma_der_a_impl<T> {};
++
++template <typename T, typename U, unsigned int n>
++struct gamma_sample_der_alpha_retval<hip_impl::Scalar_accessor<T, U, n>>
++  : gamma_sample_der_alpha_retval<T> {};
++
++template <typename T, typename U, unsigned int n>
++struct gamma_sample_der_alpha_impl<hip_impl::Scalar_accessor<T, U, n>>
++  : gamma_sample_der_alpha_impl<T> {};
++
++template <typename T, typename U, unsigned int n>
++struct zeta_impl_series<hip_impl::Scalar_accessor<T, U, n>>
++  : zeta_impl_series<T> {};
++
++template <typename T, typename U, unsigned int n>
++struct zeta_impl<hip_impl::Scalar_accessor<T, U, n>> : zeta_impl<T> {};
++
++#if EIGEN_HAS_C99_MATH
++template <typename T, typename U, unsigned int n>
++struct polygamma_impl<hip_impl::Scalar_accessor<T, U, n>>
++  : polygamma_impl<T> {};
++#endif  // EIGEN_HAS_C99_MATH
++
++#if EIGEN_HAS_C99_MATH
++template <typename T, typename U, unsigned int n>
++struct betainc_impl<hip_impl::Scalar_accessor<T, U, n>> : betainc_impl<T> {};
++
++template <typename T, typename U, unsigned int n>
++struct incbeta_cfe<hip_impl::Scalar_accessor<T, U, n>> : incbeta_cfe<T> {};
++
++template <typename T, typename U, unsigned int n>
++struct betainc_helper<hip_impl::Scalar_accessor<T, U, n>>
++  : betainc_helper<T> {};
++#else
++template <typename T, typename U, unsigned int n>
++struct betainc_impl<hip_impl::Scalar_accessor<T, U, n>> : betainc_impl<T> {};
++#endif  // EIGEN_HAS_C99_MATH
++
++template <typename T, typename U, unsigned int n>
++struct bessel_i0e_impl<hip_impl::Scalar_accessor<T, U, n>> : bessel_i0e_impl<T> {};
++
++template <typename T, typename U, unsigned int n>
++struct bessel_i0_impl<hip_impl::Scalar_accessor<T, U, n>> : bessel_i0_impl<T> {};
++
++template <typename T, typename U, unsigned int n>
++struct bessel_i1e_impl<hip_impl::Scalar_accessor<T, U, n>> : bessel_i1e_impl<T> {};
++
++template <typename T, typename U, unsigned int n>
++struct bessel_i1_impl<hip_impl::Scalar_accessor<T, U, n>> : bessel_i1_impl<T> {};
++
++template <typename T, typename U, unsigned int n>
++struct bessel_k0e_impl<hip_impl::Scalar_accessor<T, U, n>> : bessel_k0e_impl<T> {};
++
++template <typename T, typename U, unsigned int n>
++struct bessel_k0_impl<hip_impl::Scalar_accessor<T, U, n>> : bessel_k0_impl<T> {};
++
++template <typename T, typename U, unsigned int n>
++struct bessel_k1e_impl<hip_impl::Scalar_accessor<T, U, n>> : bessel_k1e_impl<T> {};
++
++template <typename T, typename U, unsigned int n>
++struct bessel_k1_impl<hip_impl::Scalar_accessor<T, U, n>> : bessel_k1_impl<T> {};
++
++template <typename T, typename U, unsigned int n>
++struct bessel_j0_impl<hip_impl::Scalar_accessor<T, U, n>> : bessel_j0_impl<T> {};
++
++template <typename T, typename U, unsigned int n>
++struct bessel_y0_impl<hip_impl::Scalar_accessor<T, U, n>> : bessel_y0_impl<T> {};
++
++template <typename T, typename U, unsigned int n>
++struct bessel_j1_impl<hip_impl::Scalar_accessor<T, U, n>> : bessel_j1_impl<T> {};
++
++template <typename T, typename U, unsigned int n>
++struct bessel_y1_impl<hip_impl::Scalar_accessor<T, U, n>> : bessel_y1_impl<T> {};
++
++}  // end namespace internal
++}  // end namespace Eigen
++
++#endif  // HIP_VECTOR_COMPATIBILITY_H

From 361b9fd059b61ddfa54d9ed5abef04d137365ab7 Mon Sep 17 00:00:00 2001
From: Shanqing Cai <cais@google.com>
Date: Thu, 19 Dec 2019 09:47:15 -0500
Subject: [PATCH 096/130] Move section on tf.debugging.enable_check_numerics to
 its own subsection

---
 RELEASE.md | 60 ++++++++++++++++++++++++++++--------------------------
 1 file changed, 31 insertions(+), 29 deletions(-)

diff --git a/RELEASE.md b/RELEASE.md
index 281f755c5c07f5..5dd596b7ed0387 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -10,7 +10,7 @@ TensorFlow 2.1 will be the last TF release supporting Python 2. Python 2 support
   * `Model.fit_generator`, `Model.evaluate_generator`, and `Model.predict_generator` are deprecated endpoints. They are subsumed by `Model.fit`, `Model.evaluate`, and `Model.predict` which now support generators and Sequences.
   * Keras `.compile` `.fit` `.evaluate` and `.predict` are allowed to be outside of the DistributionStrategy scope, as long as the model was constructed inside of a scope.
   * Keras `model.load_weights` now accepts `skip_mismatch` as an argument. This was available in external Keras, and has now been copied over to `tf.keras`.
-  * Introduced the `TextVectorization` layer, which takes as input raw strings and takes care of text standardization, tokenization, n-gram generation, and vocabulary indexing. See this [end-to-end text classification example](https://colab.research.google.com/drive/1RvCnR7h0_l4Ekn5vINWToI9TNJdpUZB3). 
+  * Introduced the `TextVectorization` layer, which takes as input raw strings and takes care of text standardization, tokenization, n-gram generation, and vocabulary indexing. See this [end-to-end text classification example](https://colab.research.google.com/drive/1RvCnR7h0_l4Ekn5vINWToI9TNJdpUZB3).
   * Experimental support for Keras `.compile`, `.fit`, `.evaluate`, and `.predict` is available for Cloud TPU Pods.
   * Automatic outside compilation is now enabled for Cloud TPUs. This allows `tf.summary` to be used more conveniently with Cloud TPUs.
   * Dynamic batch sizes with DistributionStrategy and Keras are supported on Cloud TPUs.
@@ -23,7 +23,7 @@ TensorFlow 2.1 will be the last TF release supporting Python 2. Python 2 support
   * Environment variable `TF_DETERMINISTIC_OPS` added. When set to "true" or "1", this environment variable makes `tf.nn.bias_add` operate deterministically (i.e. reproducibly) when XLA JIT compilation is *not* enabled. It also makes cuDNN convolution and max-pooling operate deterministically. This makes Keras Conv*D and MaxPool*D layers operate deterministically in both the forward and backward directions when running on a CUDA-enabled GPU.
 
 ## Known issues
-Because of [issues with building on windows](https://github.com/tensorflow/tensorflow/issues/10521), we turned off eigen strong inlining for the Windows builds. Windows binaries are expected to be slightly slower until the build issues are resolved. 
+Because of [issues with building on windows](https://github.com/tensorflow/tensorflow/issues/10521), we turned off eigen strong inlining for the Windows builds. Windows binaries are expected to be slightly slower until the build issues are resolved.
 
 ## Breaking Changes
 * Deletes `Operation.traceback_with_start_lines` for which we know of no usages.
@@ -40,7 +40,7 @@ Because of [issues with building on windows](https://github.com/tensorflow/tenso
   * Add `tf.data.experimental.dense_to_ragged_batch()`.
   * Extend `tf.data` parsing ops to support `RaggedTensors`.
 * `tf.distribute`
-  * Fix issue where GRU would crash or give incorrect output when a `tf.distribute.Strategy` was used. 
+  * Fix issue where GRU would crash or give incorrect output when a `tf.distribute.Strategy` was used.
 * `tf.estimator`
   * Added option in `tf.estimator.CheckpointSaverHook` to not save the `GraphDef`.
   * Moving the checkpoint reader from swig to pybind11.
@@ -50,11 +50,13 @@ Because of [issues with building on windows](https://github.com/tensorflow/tenso
   * Fix the incorrect stateful behavior of Keras convolutional layers.
 * `tf.lite`
   * Legalization for `NMS` ops in TFLite.
-  * add `narrow_range` and `axis` to `quantize_v2` and `dequantize` ops. 
+  * add `narrow_range` and `axis` to `quantize_v2` and `dequantize` ops.
   * Added support for `FusedBatchNormV3` in converter.
   * Add an `errno`-like field to `NNAPI` delegate for detecting `NNAPI` errors for fallback behaviour.
   * Refactors `NNAPI` Delegate to support detailed reason why an operation is not accelerated.
   * Converts hardswish subgraphs into atomic ops.
+* `tf.debugging`
+  * * Add `tf.debugging.enable_check_numerics()` and `tf.debugging.disable_check_numerics()` to facilitate debugging of numeric instability (issues involving infinities and `NaN`s).
 * Other
   * Add `RaggedTensor.merge_dims()`.
   * Added new `uniform_row_length` row-partitioning tensor to `RaggedTensor`.
@@ -66,7 +68,7 @@ Because of [issues with building on windows](https://github.com/tensorflow/tenso
   * Add preliminary support for sparse CSR matrices.
   * Tensor equality with `None` now behaves as expected.
   * Make calls to `tf.function(f)()`, `tf.function(f).get_concrete_function` and `tf.function(f).get_initialization_function` thread-safe.
-  * Add `tf.debugging.enable_check_numerics()` and `tf.debugging.disable_check_numerics()` to facilitate debugging of numeric instability (`Infinity`s and `NaN`s) under eager mode and `tf.function`s.
+  (`Infinity`s and `NaN`s) under eager mode and `tf.function`s.
   * Extend `tf.identity` to work with CompositeTensors (such as SparseTensor)
   * Added more `dtypes` and zero-sized inputs to `Einsum` Op and improved its performance
   * Enable multi-worker `NCCL` `all-reduce` inside functions executing eagerly.
@@ -94,7 +96,7 @@ This release contains contributions from many people at Google, as well as:
 8bitmp3, Aaron Ma, AbdüLhamit Yilmaz, Abhai Kollara, aflc, Ag Ramesh, Albert Z. Guo, Alex Torres, amoitra, Andrii Prymostka, angeliand, Anshuman Tripathy, Anthony Barbier, Anton Kachatkou, Anubh-V, Anuja Jakhade, Artem Ryabov, autoih, Bairen Yi, Bas Aarts, Basit Ayantunde, Ben Barsdell, Bhavani Subramanian, Brett Koonce, candy.dc, Captain-Pool, caster, cathy, Chong Yan, Choong Yin Thong, Clayne Robison, Colle, Dan Ganea, David Norman, David Refaeli, dengziming, Diego Caballero, Divyanshu, djshen, Douman, Duncan Riach, EFanZh, Elena Zhelezina, Eric Schweitz, Evgenii Zheltonozhskii, Fei Hu, fo40225, Fred Reiss, Frederic Bastien, Fredrik Knutsson, fsx950223, fwcore, George Grzegorz Pawelczak, George Sterpu, Gian Marco Iodice, Giorgio Arena, giuros01, Gomathi Ramamurthy, Guozhong Zhuang, Haifeng Jin, Haoyu Wu, HarikrishnanBalagopal, HJYOO, Huang Chen-Yi, Ilham Firdausi Putra, Imran Salam, Jared Nielsen, Jason Zaman, Jasper Vicenti, Jeff Daily, Jeff Poznanovic, Jens Elofsson, Jerry Shih, jerryyin, Jesper Dramsch, jim.meyer, Jongwon Lee, Jun Wan, Junyuan Xie, Kaixi Hou, kamalkraj, Kan Chen, Karthik Muthuraman, Keiji Ariyama, Kevin Rose, Kevin Wang, Koan-Sin Tan, kstuedem, Kwabena W. Agyeman, Lakshay Tokas, latyas, Leslie-Fang-Intel, Li, Guizi, Luciano Resende, Lukas Folle, Lukas Geiger, Mahmoud Abuzaina, Manuel Freiberger, Mark Ryan, Martin Mlostek, Masaki Kozuki, Matthew Bentham, Matthew Denton, mbhuiyan, mdfaijul, Muhwan Kim, Nagy Mostafa, nammbash, Nathan Luehr, Nathan Wells, Niranjan Hasabnis, Oleksii Volkovskyi, Olivier Moindrot, olramde, Ouyang Jin, OverLordGoldDragon, Pallavi G, Paul Andrey, Paul Wais, pkanwar23, Pooya Davoodi, Prabindh Sundareson, Rajeshwar Reddy T, Ralovich, Kristof, Refraction-Ray, Richard Barnes, richardbrks, Robert Herbig, Romeo Kienzler, Ryan Mccormick, saishruthi, Saket Khandelwal, Sami Kama, Sana Damani, Satoshi Tanaka, Sergey Mironov, Sergii Khomenko, Shahid, Shawn Presser, ShengYang1, Siddhartha Bagaria, Simon Plovyt, skeydan, srinivasan.narayanamoorthy, Stephen Mugisha, sunway513, Takeshi Watanabe, Taylor Jakobson, TengLu, TheMindVirus, ThisIsIsaac, Tim Gates, Timothy Liu, Tomer Gafner, Trent Lo, Trevor Hickey, Trevor Morris, vcarpani, Wei Wang, Wen-Heng (Jack) Chung, wenshuai, Wenshuai-Xiaomi, wenxizhu, william, William D. Irons, Xinan Jiang, Yannic, Yasir Modak, Yasuhiro Matsumoto, Yong Tang, Yongfeng Gu, Youwei Song, Zaccharie Ramzi, Zhang, Zhenyu Guo, 王振华 (Zhenhua Wang), 韩董, 이중건 Isaac Lee
 
 # Release 1.15.0
-This is the last 1.x release for TensorFlow. We do not expect to update the 1.x branch with features, although we will issue patch releases to fix vulnerabilities for at least one year. 
+This is the last 1.x release for TensorFlow. We do not expect to update the 1.x branch with features, although we will issue patch releases to fix vulnerabilities for at least one year.
 
 ## Major Features and Improvements
 * As [announced](https://groups.google.com/a/tensorflow.org/forum/#!topic/developers/iRCt5m4qUz0), `tensorflow` pip package will by default include GPU support (same as `tensorflow-gpu` now) for the platforms we currently have GPU support (Linux and Windows). It will work on machines with and without Nvidia GPUs. `tensorflow-gpu` will still be available, and CPU-only packages can be downloaded at `tensorflow-cpu` for users who are concerned about package size.
@@ -104,7 +106,7 @@ This enables writing forward compatible code: by explicitly importing either `te
 * Add toggles `tf.enable_control_flow_v2()` and `tf.disable_control_flow_v2()` for enabling/disabling v2 control flow.
 * Enable v2 control flow as part of `tf.enable_v2_behavior()` and `TF2_BEHAVIOR=1`.
 * AutoGraph translates Python control flow into TensorFlow expressions, allowing users to write regular Python inside `tf.function`-decorated functions. AutoGraph is also applied in functions used with `tf.data`, `tf.distribute` and `tf.keras` APIS.
-* Adds `enable_tensor_equality()`, which switches the behavior such that: 
+* Adds `enable_tensor_equality()`, which switches the behavior such that:
   * Tensors are no longer hashable.
   * Tensors can be compared with `==` and `!=`, yielding a Boolean Tensor with element-wise comparison results. This will be the default behavior in 2.0.
 
@@ -260,12 +262,12 @@ For information on upgrading your existing TensorFlow 1.x models, please refer t
   * TensorFlow 2.0.0 is built using devtoolset7 (GCC7) on Ubuntu 16. This may lead to ABI incompatibilities with extensions built against earlier versions of TensorFlow.
   * Tensorflow code now produces 2 different pip packages: tensorflow_core containing all the code (in the future it will contain only the private implementation) and tensorflow which is a virtual pip package doing forwarding to tensorflow_core (and in the future will contain only the public API of tensorflow). We don't expect this to be breaking, unless you were importing directly from the implementation.
   Removed the `freeze_graph` command line tool; `SavedModel` should be used in place of frozen graphs.
-  
+
 * `tf.contrib`:
   * `tf.contrib` has been deprecated, and functionality has been either migrated to the core TensorFlow API, to an ecosystem project such as [tensorflow/addons](https://www.github.com/tensorflow/addons) or [tensorflow/io](https://www.github.com/tensorflow/io), or removed entirely.
   * Remove `tf.contrib.timeseries` dependency on TF distributions.
   * Replace contrib references with `tf.estimator.experimental.*` for apis in `early_stopping.py`.
-  
+
 * `tf.estimator`:
   * Premade estimators in the tf.estimator.DNN/Linear/DNNLinearCombined family have been updated to use `tf.keras.optimizers` instead of the `tf.compat.v1.train.Optimizer`s. If you do not pass in an `optimizer=` arg or if you use a string, the premade estimator will use the Keras optimizer. This is checkpoint breaking, as the optimizers have separate variables. A checkpoint converter tool for converting optimizers is included with the release,  but if you want to avoid any change, switch to the v1 version of the estimator:  `tf.compat.v1.estimator.DNN/Linear/DNNLinearCombined*`.
   * Default aggregation for canned Estimators is now `SUM_OVER_BATCH_SIZE`. To maintain previous default behavior, please pass `SUM` as the loss aggregation method.
@@ -273,13 +275,13 @@ For information on upgrading your existing TensorFlow 1.x models, please refer t
   * `Estimator.export_savedmodel` has been renamed to `export_saved_model`.
   * When saving to SavedModel, Estimators will strip default op attributes. This is almost always the correct behavior, as it is more forwards compatible, but if you require that default attributes to be saved with the model, please use `tf.compat.v1.Estimator`.
   * Feature Columns have been upgraded to be more Eager-friendly and to work with Keras. As a result, `tf.feature_column.input_layer` has been deprecated in favor of `tf.keras.layers.DenseFeatures`. v1 feature columns have direct analogues in v2 except for `shared_embedding_columns`, which are not cross-compatible with v1 and v2. Use `tf.feature_column.shared_embeddings` instead.
-  
+
 * `tf.keras`:
   * `OMP_NUM_THREADS` is no longer used by the default Keras config.  To configure the number of threads, use `tf.config.threading` APIs.
   * `tf.keras.model.save_model` and `model.save` now defaults to saving a TensorFlow SavedModel. HDF5 files are still supported.
   * Deprecated `tf.keras.experimental.export_saved_model` and `tf.keras.experimental.function`. Please use `tf.keras.models.save_model(..., save_format='tf')` and `tf.keras.models.load_model` instead.
   * Layers now default to float32, and automatically cast their inputs to the layer's dtype. If you had a model that used float64, it will probably silently use float32 in TensorFlow 2, and a warning will be issued that starts with `Layer <layer-name>` is casting an input tensor from dtype float64 to the layer's dtype of float32. To fix, either set the default dtype to float64 with `tf.keras.backend.set_floatx('float64')`, or pass `dtype='float64'` to each of the Layer constructors. See `tf.keras.layers.Layer` for more information.
- 
+
 * `tf.lite`:
   * Removed `lite.OpHint`, `lite.experimental`, and `lite.constant` from 2.0 API.
 * Tensors are no longer hashable, but instead compare element-wise with `==` and `!=`. Use `tf.compat.v1.disable_tensor_equality()` to return to the previous behavior.
@@ -298,7 +300,7 @@ If you experience any snags when using TF 2.0, please let us know at the [TF 2.0
 
 * `tf.contrib`:
   * Expose `tf.contrib.proto.*` ops in `tf.io` (they will exist in TF2)
-  
+
 * `tf.data`:
   * Add support for TensorArrays to `tf.data Dataset`.
   * Integrate Ragged Tensors with `tf.data`.
@@ -341,13 +343,13 @@ If you experience any snags when using TF 2.0, please let us know at the [TF 2.0
   * Add support for passing list of lists to the `metrics` argument in Keras `compile`.
   * Add `tf.keras.layers.AbstractRNNCell` as the preferred implementation for RNN cells in TF v2. User can use it to implement RNN cells with custom behavior.
   * Keras training and validation curves are shown on the same plot when using the TensorBoard callback.
-  * Switched Keras `fit/evaluate/predict` execution to use only a single unified path by default unless eager execution has been explicitly disabled, regardless of input type. This unified path places an eager-friendly training step inside of a `tf.function`. With this 
+  * Switched Keras `fit/evaluate/predict` execution to use only a single unified path by default unless eager execution has been explicitly disabled, regardless of input type. This unified path places an eager-friendly training step inside of a `tf.function`. With this
    1.  All input types are converted to `Dataset`.
-   2. The path assumes there is always a distribution strategy. when distribution strategy is not specified the path uses a no-op distribution strategy. 
-   3. The training step is wrapped in `tf.function` unless `run_eagerly=True` is set in compile. The single path execution code does not yet support all use cases. We fallback to the existing v1 execution paths if your model contains the following: 
-     1. `sample_weight_mode` in compile 
-     2. `weighted_metrics` in compile 
-     3. v1 optimizer 
+   2. The path assumes there is always a distribution strategy. when distribution strategy is not specified the path uses a no-op distribution strategy.
+   3. The training step is wrapped in `tf.function` unless `run_eagerly=True` is set in compile. The single path execution code does not yet support all use cases. We fallback to the existing v1 execution paths if your model contains the following:
+     1. `sample_weight_mode` in compile
+     2. `weighted_metrics` in compile
+     3. v1 optimizer
      4. target tensors in compile
 If you are experiencing any issues because of this change, please inform us (file an issue) about your use case and you can unblock yourself by setting `experimental_run_tf_function=False` in compile meanwhile. We have seen couple of use cases where the model usage pattern is not as expected and would not work with this change.
    1. output tensors of one layer is used in the constructor of another.
@@ -363,25 +365,25 @@ If you are experiencing any issues because of this change, please inform us (fil
   * Updates binary cross entropy logic in Keras when input is probabilities. Instead of converting probabilities to logits, we are using the cross entropy formula for probabilities.
   * Added public APIs for `cumsum` and `cumprod` keras backend functions.
   * Add support for temporal sample weight mode in subclassed models.
-  * Raise `ValueError` if an integer is passed to the training APIs. 
+  * Raise `ValueError` if an integer is passed to the training APIs.
   * Added fault-tolerance support for training Keras model via `model.fit()` with `MultiWorkerMirroredStrategy`, tutorial available.
   * Custom Callback tutorial is now available.
   * To train with `tf.distribute`, Keras API is recommended over estimator.
   * `steps_per_epoch` and `steps` arguments are supported with numpy arrays.
-  * New error message when unexpected keys are used in sample_weight/class_weight dictionaries 
+  * New error message when unexpected keys are used in sample_weight/class_weight dictionaries
   * Losses are scaled in Keras compile/fit and not in the optimizers anymore. If you are using custom training loop, we have new utilities to help scale losses `tf.nn.compute_average_loss`, `tf.nn.scale_regularization_loss`.
   * `Layer` apply and add_variable APIs are deprecated.
   * Added support for channels first data format in cross entropy losses with logits and support for tensors with unknown ranks.
   * Error messages will be raised if `add_update`, `add_metric`, `add_loss`, activity regularizers are used inside of a control flow branch.
-  * New loss reduction types: 
-    1. `AUTO`: Indicates that the reduction option will be determined by the usage context. For almost all cases this defaults to `SUM_OVER_BATCH_SIZE`. When    used with `tf.distribute.Strategy`, outside of built-in training loops such as `tf.keras` `compile` and `fit`, we expect reduction value to be `SUM` or `NONE`. Using `AUTO` in that case will raise an error. 
-    2. `NONE`: Weighted losses with one dimension reduced (axis=-1, or axis specified by loss function). When this reduction type used with built-in Keras training loops like `fit`/`evaluate`, the unreduced vector loss is passed to the optimizer but the reported loss will be a scalar value. 
+  * New loss reduction types:
+    1. `AUTO`: Indicates that the reduction option will be determined by the usage context. For almost all cases this defaults to `SUM_OVER_BATCH_SIZE`. When    used with `tf.distribute.Strategy`, outside of built-in training loops such as `tf.keras` `compile` and `fit`, we expect reduction value to be `SUM` or `NONE`. Using `AUTO` in that case will raise an error.
+    2. `NONE`: Weighted losses with one dimension reduced (axis=-1, or axis specified by loss function). When this reduction type used with built-in Keras training loops like `fit`/`evaluate`, the unreduced vector loss is passed to the optimizer but the reported loss will be a scalar value.
     3. `SUM`: Scalar sum of weighted losses. 4. `SUM_OVER_BATCH_SIZE`: Scalar `SUM` divided by number of elements in losses. This reduction type is not supported when used with `tf.distribute.Strategy` outside of built-in training loops like `tf.keras` `compile`/`fit`.
   * Wraps losses passed to the `compile` API (strings and v1 losses) which are not instances of v2 `Loss` class in `LossWrapper` class. => All losses will now use `SUM_OVER_BATCH_SIZE` reduction as default.
   * `model.add_loss(symbolic_tensor)` should work in ambient eager.
-  * Update metric name to always reflect what the user has given in compile. Affects following cases 
-    1. When name is given as 'accuracy'/'crossentropy' 
-    2. When an aliased function name is used eg. 'mse' 
+  * Update metric name to always reflect what the user has given in compile. Affects following cases
+    1. When name is given as 'accuracy'/'crossentropy'
+    2. When an aliased function name is used eg. 'mse'
     3. Removing the `weighted` prefix from weighted metric names.
   * Allow non-Tensors through v2 losses.
   * Add v2 sparse categorical crossentropy metric.
@@ -526,7 +528,7 @@ If you are experiencing any issues because of this change, please inform us (fil
   * Add support for `add_metric` in the graph function mode.
   * Updating cosine similarity loss - removed the negate sign from cosine similarity.
   * Changed default for gradient accumulation for TPU embeddings to true.
-  * Adds summary trace API for collecting graph and profile information. 
+  * Adds summary trace API for collecting graph and profile information.
   * The `precision_mode` argument to `TrtGraphConverter` is now case insensitive.
 
 
@@ -1305,7 +1307,7 @@ Ag Ramesh, Alex Wiltschko, Alexander Pantyukhin, Amogh Mannekote, An Jiaoyang, A
   * [`tf.contrib.estimator.RNNEstimator`](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/contrib/estimator/RNNClassifier)
 * The [distributions.Bijector](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/contrib/distributions/bijectors/Bijector)
   API supports broadcasting for Bijectors with new API changes.
-  
+
 ## Breaking Changes
   * If you're opening empty variable scopes; replace `variable_scope('', ...)` by
     `variable_scope(tf.get_variable_scope(), ...)`.
@@ -1784,7 +1786,7 @@ Samuel He, Sandeep Dcunha, sandipmgiri, Sang Han, scott, Scott Mudge, Se-Won Kim
 Simone Cirillo, Steffen Schmitz, Suvojit Manna, Sylvus, Taehoon Lee, Ted Chang, Thomas Deegan,
 Till Hoffmann, Tim, Toni Kunic, Toon Verstraelen, Tristan Rice, Urs KöSter, Utkarsh Upadhyay,
 Vish (Ishaya) Abrams, Winnie Tsang, Yan Chen, Yan Facai (颜发才), Yi Yang, Yong Tang,
-Youssef Hesham, Yuan (Terry) Tang, Zhengsheng Wei, zxcqwe4906, 张志豪, 田传武 
+Youssef Hesham, Yuan (Terry) Tang, Zhengsheng Wei, zxcqwe4906, 张志豪, 田传武
 
 We are also grateful to all who filed issues or helped resolve them, asked and
 answered questions, and were part of inspiring discussions.

From 62664e862bd16ef8aa2e4dc40f5d257d13993bec Mon Sep 17 00:00:00 2001
From: Shanqing Cai <cais@google.com>
Date: Thu, 19 Dec 2019 09:50:39 -0500
Subject: [PATCH 097/130] Fix

---
 RELEASE.md | 61 +++++++++++++++++++++++++++---------------------------
 1 file changed, 30 insertions(+), 31 deletions(-)

diff --git a/RELEASE.md b/RELEASE.md
index 5dd596b7ed0387..57fbfe74e95bee 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -10,7 +10,7 @@ TensorFlow 2.1 will be the last TF release supporting Python 2. Python 2 support
   * `Model.fit_generator`, `Model.evaluate_generator`, and `Model.predict_generator` are deprecated endpoints. They are subsumed by `Model.fit`, `Model.evaluate`, and `Model.predict` which now support generators and Sequences.
   * Keras `.compile` `.fit` `.evaluate` and `.predict` are allowed to be outside of the DistributionStrategy scope, as long as the model was constructed inside of a scope.
   * Keras `model.load_weights` now accepts `skip_mismatch` as an argument. This was available in external Keras, and has now been copied over to `tf.keras`.
-  * Introduced the `TextVectorization` layer, which takes as input raw strings and takes care of text standardization, tokenization, n-gram generation, and vocabulary indexing. See this [end-to-end text classification example](https://colab.research.google.com/drive/1RvCnR7h0_l4Ekn5vINWToI9TNJdpUZB3).
+  * Introduced the `TextVectorization` layer, which takes as input raw strings and takes care of text standardization, tokenization, n-gram generation, and vocabulary indexing. See this [end-to-end text classification example](https://colab.research.google.com/drive/1RvCnR7h0_l4Ekn5vINWToI9TNJdpUZB3). 
   * Experimental support for Keras `.compile`, `.fit`, `.evaluate`, and `.predict` is available for Cloud TPU Pods.
   * Automatic outside compilation is now enabled for Cloud TPUs. This allows `tf.summary` to be used more conveniently with Cloud TPUs.
   * Dynamic batch sizes with DistributionStrategy and Keras are supported on Cloud TPUs.
@@ -18,12 +18,14 @@ TensorFlow 2.1 will be the last TF release supporting Python 2. Python 2 support
   * Keras reference implementations for many popular models are available in the TensorFlow [Model Garden](https://github.com/tensorflow/models/tree/master/official).
 * `tf.data`
   * Changes rebatching for `tf.data datasets` + distribution strategies for better performance.   Note that the dataset also behaves slightly differently, in that the rebatched dataset cardinality will always be a multiple of the number of replicas.
+* `tf.debugging`
+  * Add `tf.debugging.enable_check_numerics()` and `tf.debugging.disable_check_numerics()` to facilitate debugging the root causes of numeric instability (issues involving infinities and `NaN`s).
 * `TensorRT`
   * [TensorRT 6.0](https://developer.nvidia.com/tensorrt#tensorrt-whats-new) is now supported and enabled by default. This adds support for more TensorFlow ops including Conv3D, Conv3DBackpropInputV2, AvgPool3D, MaxPool3D, ResizeBilinear, and ResizeNearestNeighbor. In addition, the TensorFlow-TensorRT python conversion API is exported as `tf.experimental.tensorrt.Converter`.
   * Environment variable `TF_DETERMINISTIC_OPS` added. When set to "true" or "1", this environment variable makes `tf.nn.bias_add` operate deterministically (i.e. reproducibly) when XLA JIT compilation is *not* enabled. It also makes cuDNN convolution and max-pooling operate deterministically. This makes Keras Conv*D and MaxPool*D layers operate deterministically in both the forward and backward directions when running on a CUDA-enabled GPU.
 
 ## Known issues
-Because of [issues with building on windows](https://github.com/tensorflow/tensorflow/issues/10521), we turned off eigen strong inlining for the Windows builds. Windows binaries are expected to be slightly slower until the build issues are resolved.
+Because of [issues with building on windows](https://github.com/tensorflow/tensorflow/issues/10521), we turned off eigen strong inlining for the Windows builds. Windows binaries are expected to be slightly slower until the build issues are resolved. 
 
 ## Breaking Changes
 * Deletes `Operation.traceback_with_start_lines` for which we know of no usages.
@@ -40,7 +42,7 @@ Because of [issues with building on windows](https://github.com/tensorflow/tenso
   * Add `tf.data.experimental.dense_to_ragged_batch()`.
   * Extend `tf.data` parsing ops to support `RaggedTensors`.
 * `tf.distribute`
-  * Fix issue where GRU would crash or give incorrect output when a `tf.distribute.Strategy` was used.
+  * Fix issue where GRU would crash or give incorrect output when a `tf.distribute.Strategy` was used. 
 * `tf.estimator`
   * Added option in `tf.estimator.CheckpointSaverHook` to not save the `GraphDef`.
   * Moving the checkpoint reader from swig to pybind11.
@@ -50,13 +52,11 @@ Because of [issues with building on windows](https://github.com/tensorflow/tenso
   * Fix the incorrect stateful behavior of Keras convolutional layers.
 * `tf.lite`
   * Legalization for `NMS` ops in TFLite.
-  * add `narrow_range` and `axis` to `quantize_v2` and `dequantize` ops.
+  * add `narrow_range` and `axis` to `quantize_v2` and `dequantize` ops. 
   * Added support for `FusedBatchNormV3` in converter.
   * Add an `errno`-like field to `NNAPI` delegate for detecting `NNAPI` errors for fallback behaviour.
   * Refactors `NNAPI` Delegate to support detailed reason why an operation is not accelerated.
   * Converts hardswish subgraphs into atomic ops.
-* `tf.debugging`
-  * * Add `tf.debugging.enable_check_numerics()` and `tf.debugging.disable_check_numerics()` to facilitate debugging of numeric instability (issues involving infinities and `NaN`s).
 * Other
   * Add `RaggedTensor.merge_dims()`.
   * Added new `uniform_row_length` row-partitioning tensor to `RaggedTensor`.
@@ -68,7 +68,6 @@ Because of [issues with building on windows](https://github.com/tensorflow/tenso
   * Add preliminary support for sparse CSR matrices.
   * Tensor equality with `None` now behaves as expected.
   * Make calls to `tf.function(f)()`, `tf.function(f).get_concrete_function` and `tf.function(f).get_initialization_function` thread-safe.
-  (`Infinity`s and `NaN`s) under eager mode and `tf.function`s.
   * Extend `tf.identity` to work with CompositeTensors (such as SparseTensor)
   * Added more `dtypes` and zero-sized inputs to `Einsum` Op and improved its performance
   * Enable multi-worker `NCCL` `all-reduce` inside functions executing eagerly.
@@ -96,7 +95,7 @@ This release contains contributions from many people at Google, as well as:
 8bitmp3, Aaron Ma, AbdüLhamit Yilmaz, Abhai Kollara, aflc, Ag Ramesh, Albert Z. Guo, Alex Torres, amoitra, Andrii Prymostka, angeliand, Anshuman Tripathy, Anthony Barbier, Anton Kachatkou, Anubh-V, Anuja Jakhade, Artem Ryabov, autoih, Bairen Yi, Bas Aarts, Basit Ayantunde, Ben Barsdell, Bhavani Subramanian, Brett Koonce, candy.dc, Captain-Pool, caster, cathy, Chong Yan, Choong Yin Thong, Clayne Robison, Colle, Dan Ganea, David Norman, David Refaeli, dengziming, Diego Caballero, Divyanshu, djshen, Douman, Duncan Riach, EFanZh, Elena Zhelezina, Eric Schweitz, Evgenii Zheltonozhskii, Fei Hu, fo40225, Fred Reiss, Frederic Bastien, Fredrik Knutsson, fsx950223, fwcore, George Grzegorz Pawelczak, George Sterpu, Gian Marco Iodice, Giorgio Arena, giuros01, Gomathi Ramamurthy, Guozhong Zhuang, Haifeng Jin, Haoyu Wu, HarikrishnanBalagopal, HJYOO, Huang Chen-Yi, Ilham Firdausi Putra, Imran Salam, Jared Nielsen, Jason Zaman, Jasper Vicenti, Jeff Daily, Jeff Poznanovic, Jens Elofsson, Jerry Shih, jerryyin, Jesper Dramsch, jim.meyer, Jongwon Lee, Jun Wan, Junyuan Xie, Kaixi Hou, kamalkraj, Kan Chen, Karthik Muthuraman, Keiji Ariyama, Kevin Rose, Kevin Wang, Koan-Sin Tan, kstuedem, Kwabena W. Agyeman, Lakshay Tokas, latyas, Leslie-Fang-Intel, Li, Guizi, Luciano Resende, Lukas Folle, Lukas Geiger, Mahmoud Abuzaina, Manuel Freiberger, Mark Ryan, Martin Mlostek, Masaki Kozuki, Matthew Bentham, Matthew Denton, mbhuiyan, mdfaijul, Muhwan Kim, Nagy Mostafa, nammbash, Nathan Luehr, Nathan Wells, Niranjan Hasabnis, Oleksii Volkovskyi, Olivier Moindrot, olramde, Ouyang Jin, OverLordGoldDragon, Pallavi G, Paul Andrey, Paul Wais, pkanwar23, Pooya Davoodi, Prabindh Sundareson, Rajeshwar Reddy T, Ralovich, Kristof, Refraction-Ray, Richard Barnes, richardbrks, Robert Herbig, Romeo Kienzler, Ryan Mccormick, saishruthi, Saket Khandelwal, Sami Kama, Sana Damani, Satoshi Tanaka, Sergey Mironov, Sergii Khomenko, Shahid, Shawn Presser, ShengYang1, Siddhartha Bagaria, Simon Plovyt, skeydan, srinivasan.narayanamoorthy, Stephen Mugisha, sunway513, Takeshi Watanabe, Taylor Jakobson, TengLu, TheMindVirus, ThisIsIsaac, Tim Gates, Timothy Liu, Tomer Gafner, Trent Lo, Trevor Hickey, Trevor Morris, vcarpani, Wei Wang, Wen-Heng (Jack) Chung, wenshuai, Wenshuai-Xiaomi, wenxizhu, william, William D. Irons, Xinan Jiang, Yannic, Yasir Modak, Yasuhiro Matsumoto, Yong Tang, Yongfeng Gu, Youwei Song, Zaccharie Ramzi, Zhang, Zhenyu Guo, 王振华 (Zhenhua Wang), 韩董, 이중건 Isaac Lee
 
 # Release 1.15.0
-This is the last 1.x release for TensorFlow. We do not expect to update the 1.x branch with features, although we will issue patch releases to fix vulnerabilities for at least one year.
+This is the last 1.x release for TensorFlow. We do not expect to update the 1.x branch with features, although we will issue patch releases to fix vulnerabilities for at least one year. 
 
 ## Major Features and Improvements
 * As [announced](https://groups.google.com/a/tensorflow.org/forum/#!topic/developers/iRCt5m4qUz0), `tensorflow` pip package will by default include GPU support (same as `tensorflow-gpu` now) for the platforms we currently have GPU support (Linux and Windows). It will work on machines with and without Nvidia GPUs. `tensorflow-gpu` will still be available, and CPU-only packages can be downloaded at `tensorflow-cpu` for users who are concerned about package size.
@@ -106,7 +105,7 @@ This enables writing forward compatible code: by explicitly importing either `te
 * Add toggles `tf.enable_control_flow_v2()` and `tf.disable_control_flow_v2()` for enabling/disabling v2 control flow.
 * Enable v2 control flow as part of `tf.enable_v2_behavior()` and `TF2_BEHAVIOR=1`.
 * AutoGraph translates Python control flow into TensorFlow expressions, allowing users to write regular Python inside `tf.function`-decorated functions. AutoGraph is also applied in functions used with `tf.data`, `tf.distribute` and `tf.keras` APIS.
-* Adds `enable_tensor_equality()`, which switches the behavior such that:
+* Adds `enable_tensor_equality()`, which switches the behavior such that: 
   * Tensors are no longer hashable.
   * Tensors can be compared with `==` and `!=`, yielding a Boolean Tensor with element-wise comparison results. This will be the default behavior in 2.0.
 
@@ -262,12 +261,12 @@ For information on upgrading your existing TensorFlow 1.x models, please refer t
   * TensorFlow 2.0.0 is built using devtoolset7 (GCC7) on Ubuntu 16. This may lead to ABI incompatibilities with extensions built against earlier versions of TensorFlow.
   * Tensorflow code now produces 2 different pip packages: tensorflow_core containing all the code (in the future it will contain only the private implementation) and tensorflow which is a virtual pip package doing forwarding to tensorflow_core (and in the future will contain only the public API of tensorflow). We don't expect this to be breaking, unless you were importing directly from the implementation.
   Removed the `freeze_graph` command line tool; `SavedModel` should be used in place of frozen graphs.
-
+  
 * `tf.contrib`:
   * `tf.contrib` has been deprecated, and functionality has been either migrated to the core TensorFlow API, to an ecosystem project such as [tensorflow/addons](https://www.github.com/tensorflow/addons) or [tensorflow/io](https://www.github.com/tensorflow/io), or removed entirely.
   * Remove `tf.contrib.timeseries` dependency on TF distributions.
   * Replace contrib references with `tf.estimator.experimental.*` for apis in `early_stopping.py`.
-
+  
 * `tf.estimator`:
   * Premade estimators in the tf.estimator.DNN/Linear/DNNLinearCombined family have been updated to use `tf.keras.optimizers` instead of the `tf.compat.v1.train.Optimizer`s. If you do not pass in an `optimizer=` arg or if you use a string, the premade estimator will use the Keras optimizer. This is checkpoint breaking, as the optimizers have separate variables. A checkpoint converter tool for converting optimizers is included with the release,  but if you want to avoid any change, switch to the v1 version of the estimator:  `tf.compat.v1.estimator.DNN/Linear/DNNLinearCombined*`.
   * Default aggregation for canned Estimators is now `SUM_OVER_BATCH_SIZE`. To maintain previous default behavior, please pass `SUM` as the loss aggregation method.
@@ -275,13 +274,13 @@ For information on upgrading your existing TensorFlow 1.x models, please refer t
   * `Estimator.export_savedmodel` has been renamed to `export_saved_model`.
   * When saving to SavedModel, Estimators will strip default op attributes. This is almost always the correct behavior, as it is more forwards compatible, but if you require that default attributes to be saved with the model, please use `tf.compat.v1.Estimator`.
   * Feature Columns have been upgraded to be more Eager-friendly and to work with Keras. As a result, `tf.feature_column.input_layer` has been deprecated in favor of `tf.keras.layers.DenseFeatures`. v1 feature columns have direct analogues in v2 except for `shared_embedding_columns`, which are not cross-compatible with v1 and v2. Use `tf.feature_column.shared_embeddings` instead.
-
+  
 * `tf.keras`:
   * `OMP_NUM_THREADS` is no longer used by the default Keras config.  To configure the number of threads, use `tf.config.threading` APIs.
   * `tf.keras.model.save_model` and `model.save` now defaults to saving a TensorFlow SavedModel. HDF5 files are still supported.
   * Deprecated `tf.keras.experimental.export_saved_model` and `tf.keras.experimental.function`. Please use `tf.keras.models.save_model(..., save_format='tf')` and `tf.keras.models.load_model` instead.
   * Layers now default to float32, and automatically cast their inputs to the layer's dtype. If you had a model that used float64, it will probably silently use float32 in TensorFlow 2, and a warning will be issued that starts with `Layer <layer-name>` is casting an input tensor from dtype float64 to the layer's dtype of float32. To fix, either set the default dtype to float64 with `tf.keras.backend.set_floatx('float64')`, or pass `dtype='float64'` to each of the Layer constructors. See `tf.keras.layers.Layer` for more information.
-
+ 
 * `tf.lite`:
   * Removed `lite.OpHint`, `lite.experimental`, and `lite.constant` from 2.0 API.
 * Tensors are no longer hashable, but instead compare element-wise with `==` and `!=`. Use `tf.compat.v1.disable_tensor_equality()` to return to the previous behavior.
@@ -300,7 +299,7 @@ If you experience any snags when using TF 2.0, please let us know at the [TF 2.0
 
 * `tf.contrib`:
   * Expose `tf.contrib.proto.*` ops in `tf.io` (they will exist in TF2)
-
+  
 * `tf.data`:
   * Add support for TensorArrays to `tf.data Dataset`.
   * Integrate Ragged Tensors with `tf.data`.
@@ -343,13 +342,13 @@ If you experience any snags when using TF 2.0, please let us know at the [TF 2.0
   * Add support for passing list of lists to the `metrics` argument in Keras `compile`.
   * Add `tf.keras.layers.AbstractRNNCell` as the preferred implementation for RNN cells in TF v2. User can use it to implement RNN cells with custom behavior.
   * Keras training and validation curves are shown on the same plot when using the TensorBoard callback.
-  * Switched Keras `fit/evaluate/predict` execution to use only a single unified path by default unless eager execution has been explicitly disabled, regardless of input type. This unified path places an eager-friendly training step inside of a `tf.function`. With this
+  * Switched Keras `fit/evaluate/predict` execution to use only a single unified path by default unless eager execution has been explicitly disabled, regardless of input type. This unified path places an eager-friendly training step inside of a `tf.function`. With this 
    1.  All input types are converted to `Dataset`.
-   2. The path assumes there is always a distribution strategy. when distribution strategy is not specified the path uses a no-op distribution strategy.
-   3. The training step is wrapped in `tf.function` unless `run_eagerly=True` is set in compile. The single path execution code does not yet support all use cases. We fallback to the existing v1 execution paths if your model contains the following:
-     1. `sample_weight_mode` in compile
-     2. `weighted_metrics` in compile
-     3. v1 optimizer
+   2. The path assumes there is always a distribution strategy. when distribution strategy is not specified the path uses a no-op distribution strategy. 
+   3. The training step is wrapped in `tf.function` unless `run_eagerly=True` is set in compile. The single path execution code does not yet support all use cases. We fallback to the existing v1 execution paths if your model contains the following: 
+     1. `sample_weight_mode` in compile 
+     2. `weighted_metrics` in compile 
+     3. v1 optimizer 
      4. target tensors in compile
 If you are experiencing any issues because of this change, please inform us (file an issue) about your use case and you can unblock yourself by setting `experimental_run_tf_function=False` in compile meanwhile. We have seen couple of use cases where the model usage pattern is not as expected and would not work with this change.
    1. output tensors of one layer is used in the constructor of another.
@@ -365,25 +364,25 @@ If you are experiencing any issues because of this change, please inform us (fil
   * Updates binary cross entropy logic in Keras when input is probabilities. Instead of converting probabilities to logits, we are using the cross entropy formula for probabilities.
   * Added public APIs for `cumsum` and `cumprod` keras backend functions.
   * Add support for temporal sample weight mode in subclassed models.
-  * Raise `ValueError` if an integer is passed to the training APIs.
+  * Raise `ValueError` if an integer is passed to the training APIs. 
   * Added fault-tolerance support for training Keras model via `model.fit()` with `MultiWorkerMirroredStrategy`, tutorial available.
   * Custom Callback tutorial is now available.
   * To train with `tf.distribute`, Keras API is recommended over estimator.
   * `steps_per_epoch` and `steps` arguments are supported with numpy arrays.
-  * New error message when unexpected keys are used in sample_weight/class_weight dictionaries
+  * New error message when unexpected keys are used in sample_weight/class_weight dictionaries 
   * Losses are scaled in Keras compile/fit and not in the optimizers anymore. If you are using custom training loop, we have new utilities to help scale losses `tf.nn.compute_average_loss`, `tf.nn.scale_regularization_loss`.
   * `Layer` apply and add_variable APIs are deprecated.
   * Added support for channels first data format in cross entropy losses with logits and support for tensors with unknown ranks.
   * Error messages will be raised if `add_update`, `add_metric`, `add_loss`, activity regularizers are used inside of a control flow branch.
-  * New loss reduction types:
-    1. `AUTO`: Indicates that the reduction option will be determined by the usage context. For almost all cases this defaults to `SUM_OVER_BATCH_SIZE`. When    used with `tf.distribute.Strategy`, outside of built-in training loops such as `tf.keras` `compile` and `fit`, we expect reduction value to be `SUM` or `NONE`. Using `AUTO` in that case will raise an error.
-    2. `NONE`: Weighted losses with one dimension reduced (axis=-1, or axis specified by loss function). When this reduction type used with built-in Keras training loops like `fit`/`evaluate`, the unreduced vector loss is passed to the optimizer but the reported loss will be a scalar value.
+  * New loss reduction types: 
+    1. `AUTO`: Indicates that the reduction option will be determined by the usage context. For almost all cases this defaults to `SUM_OVER_BATCH_SIZE`. When    used with `tf.distribute.Strategy`, outside of built-in training loops such as `tf.keras` `compile` and `fit`, we expect reduction value to be `SUM` or `NONE`. Using `AUTO` in that case will raise an error. 
+    2. `NONE`: Weighted losses with one dimension reduced (axis=-1, or axis specified by loss function). When this reduction type used with built-in Keras training loops like `fit`/`evaluate`, the unreduced vector loss is passed to the optimizer but the reported loss will be a scalar value. 
     3. `SUM`: Scalar sum of weighted losses. 4. `SUM_OVER_BATCH_SIZE`: Scalar `SUM` divided by number of elements in losses. This reduction type is not supported when used with `tf.distribute.Strategy` outside of built-in training loops like `tf.keras` `compile`/`fit`.
   * Wraps losses passed to the `compile` API (strings and v1 losses) which are not instances of v2 `Loss` class in `LossWrapper` class. => All losses will now use `SUM_OVER_BATCH_SIZE` reduction as default.
   * `model.add_loss(symbolic_tensor)` should work in ambient eager.
-  * Update metric name to always reflect what the user has given in compile. Affects following cases
-    1. When name is given as 'accuracy'/'crossentropy'
-    2. When an aliased function name is used eg. 'mse'
+  * Update metric name to always reflect what the user has given in compile. Affects following cases 
+    1. When name is given as 'accuracy'/'crossentropy' 
+    2. When an aliased function name is used eg. 'mse' 
     3. Removing the `weighted` prefix from weighted metric names.
   * Allow non-Tensors through v2 losses.
   * Add v2 sparse categorical crossentropy metric.
@@ -528,7 +527,7 @@ If you are experiencing any issues because of this change, please inform us (fil
   * Add support for `add_metric` in the graph function mode.
   * Updating cosine similarity loss - removed the negate sign from cosine similarity.
   * Changed default for gradient accumulation for TPU embeddings to true.
-  * Adds summary trace API for collecting graph and profile information.
+  * Adds summary trace API for collecting graph and profile information. 
   * The `precision_mode` argument to `TrtGraphConverter` is now case insensitive.
 
 
@@ -1307,7 +1306,7 @@ Ag Ramesh, Alex Wiltschko, Alexander Pantyukhin, Amogh Mannekote, An Jiaoyang, A
   * [`tf.contrib.estimator.RNNEstimator`](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/contrib/estimator/RNNClassifier)
 * The [distributions.Bijector](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/contrib/distributions/bijectors/Bijector)
   API supports broadcasting for Bijectors with new API changes.
-
+  
 ## Breaking Changes
   * If you're opening empty variable scopes; replace `variable_scope('', ...)` by
     `variable_scope(tf.get_variable_scope(), ...)`.
@@ -1786,7 +1785,7 @@ Samuel He, Sandeep Dcunha, sandipmgiri, Sang Han, scott, Scott Mudge, Se-Won Kim
 Simone Cirillo, Steffen Schmitz, Suvojit Manna, Sylvus, Taehoon Lee, Ted Chang, Thomas Deegan,
 Till Hoffmann, Tim, Toni Kunic, Toon Verstraelen, Tristan Rice, Urs KöSter, Utkarsh Upadhyay,
 Vish (Ishaya) Abrams, Winnie Tsang, Yan Chen, Yan Facai (颜发才), Yi Yang, Yong Tang,
-Youssef Hesham, Yuan (Terry) Tang, Zhengsheng Wei, zxcqwe4906, 张志豪, 田传武
+Youssef Hesham, Yuan (Terry) Tang, Zhengsheng Wei, zxcqwe4906, 张志豪, 田传武 
 
 We are also grateful to all who filed issues or helped resolve them, asked and
 answered questions, and were part of inspiring discussions.

From a6fda7c38eaa93cfbeb773c8cd8d6a9e6e4bdb02 Mon Sep 17 00:00:00 2001
From: Shanqing Cai <cais@google.com>
Date: Thu, 19 Dec 2019 09:52:04 -0500
Subject: [PATCH 098/130] Grammar tweak

---
 RELEASE.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/RELEASE.md b/RELEASE.md
index 57fbfe74e95bee..6ff471f343274c 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -19,7 +19,7 @@ TensorFlow 2.1 will be the last TF release supporting Python 2. Python 2 support
 * `tf.data`
   * Changes rebatching for `tf.data datasets` + distribution strategies for better performance.   Note that the dataset also behaves slightly differently, in that the rebatched dataset cardinality will always be a multiple of the number of replicas.
 * `tf.debugging`
-  * Add `tf.debugging.enable_check_numerics()` and `tf.debugging.disable_check_numerics()` to facilitate debugging the root causes of numeric instability (issues involving infinities and `NaN`s).
+  * Add `tf.debugging.enable_check_numerics()` and `tf.debugging.disable_check_numerics()` to help debugging the root causes of issues involving infinities and `NaN`s.
 * `TensorRT`
   * [TensorRT 6.0](https://developer.nvidia.com/tensorrt#tensorrt-whats-new) is now supported and enabled by default. This adds support for more TensorFlow ops including Conv3D, Conv3DBackpropInputV2, AvgPool3D, MaxPool3D, ResizeBilinear, and ResizeNearestNeighbor. In addition, the TensorFlow-TensorRT python conversion API is exported as `tf.experimental.tensorrt.Converter`.
   * Environment variable `TF_DETERMINISTIC_OPS` added. When set to "true" or "1", this environment variable makes `tf.nn.bias_add` operate deterministically (i.e. reproducibly) when XLA JIT compilation is *not* enabled. It also makes cuDNN convolution and max-pooling operate deterministically. This makes Keras Conv*D and MaxPool*D layers operate deterministically in both the forward and backward directions when running on a CUDA-enabled GPU.

From 82e2d5a454137d9a96a57f4bef6f0e0ebabc8f03 Mon Sep 17 00:00:00 2001
From: Mihai Maruseac <mihai.maruseac@gmail.com>
Date: Thu, 19 Dec 2019 08:53:06 -0800
Subject: [PATCH 099/130] Revert "Revert "[r2.1 cherry-pick] Fix pip package
 API generation""

---
 tensorflow/api_template.__init__.py                  | 10 ++++++----
 tensorflow/api_template_v1.__init__.py               |  9 +++++----
 .../python/tools/api/generator/create_python_api.py  | 12 +++++++++---
 .../tools/api/generator/create_python_api_test.py    | 10 +++++-----
 tensorflow/virtual_root_template_v1.__init__.py      |  3 ---
 tensorflow/virtual_root_template_v2.__init__.py      | 10 ----------
 6 files changed, 25 insertions(+), 29 deletions(-)

diff --git a/tensorflow/api_template.__init__.py b/tensorflow/api_template.__init__.py
index 56d65d45faf0b1..c515cc76b9aacd 100644
--- a/tensorflow/api_template.__init__.py
+++ b/tensorflow/api_template.__init__.py
@@ -119,11 +119,11 @@ def _running_from_pip_package():
       _current_file_location.startswith(dir_) for dir_ in _site_packages_dirs)
 
 if _running_from_pip_package():
-  for s in _site_packages_dirs:
+  for _s in _site_packages_dirs:
     # TODO(gunan): Add sanity checks to loaded modules here.
-    plugin_dir = _os.path.join(s, 'tensorflow-plugins')
-    if _fi.file_exists(plugin_dir):
-      _ll.load_library(plugin_dir)
+    _plugin_dir = _os.path.join(_s, 'tensorflow-plugins')
+    if _fi.file_exists(_plugin_dir):
+      _ll.load_library(_plugin_dir)
 
 # Add module aliases
 if hasattr(_current_module, 'keras'):
@@ -136,3 +136,5 @@ def _running_from_pip_package():
   setattr(_current_module, "optimizers", optimizers)
   setattr(_current_module, "initializers", initializers)
 # pylint: enable=undefined-variable
+
+# __all__ PLACEHOLDER
diff --git a/tensorflow/api_template_v1.__init__.py b/tensorflow/api_template_v1.__init__.py
index 97478a18b8a20a..2b2899c3fe031e 100644
--- a/tensorflow/api_template_v1.__init__.py
+++ b/tensorflow/api_template_v1.__init__.py
@@ -132,9 +132,10 @@ def _running_from_pip_package():
       _current_file_location.startswith(dir_) for dir_ in _site_packages_dirs)
 
 if _running_from_pip_package():
-  for s in _site_packages_dirs:
+  for _s in _site_packages_dirs:
     # TODO(gunan): Add sanity checks to loaded modules here.
-    plugin_dir = _os.path.join(s, 'tensorflow-plugins')
-    if _fi.file_exists(plugin_dir):
-      _ll.load_library(plugin_dir)
+    _plugin_dir = _os.path.join(_s, 'tensorflow-plugins')
+    if _fi.file_exists(_plugin_dir):
+      _ll.load_library(_plugin_dir)
 
+# __all__ PLACEHOLDER
diff --git a/tensorflow/python/tools/api/generator/create_python_api.py b/tensorflow/python/tools/api/generator/create_python_api.py
index 3af677322d67ee..80f663683c3ee0 100644
--- a/tensorflow/python/tools/api/generator/create_python_api.py
+++ b/tensorflow/python/tools/api/generator/create_python_api.py
@@ -243,11 +243,12 @@ def build(self):
     # from it using * import. Don't need this for lazy_loading because the
     # underscore symbols are already included in __all__ when passed in and
     # handled by TFModuleWrapper.
+    root_module_footer = ''
     if not self._lazy_loading:
       underscore_names_str = ', '.join(
           '\'%s\'' % name for name in self._underscore_names_in_root)
 
-      module_text_map[''] = module_text_map.get('', '') + '''
+      root_module_footer = '''
 _names_with_underscore = [%s]
 __all__ = [_s for _s in dir() if not _s.startswith('_')]
 __all__.extend([_s for _s in _names_with_underscore])
@@ -273,7 +274,7 @@ def build(self):
         footer_text_map[dest_module] = _DEPRECATION_FOOTER % (
             dest_module, public_apis_name, deprecation, has_lite)
 
-    return module_text_map, footer_text_map
+    return module_text_map, footer_text_map, root_module_footer
 
   def format_import(self, source_module_name, source_name, dest_name):
     """Formats import statement.
@@ -620,7 +621,11 @@ def create_api_files(output_files, packages, root_init_template, output_dir,
       os.makedirs(os.path.dirname(file_path))
     open(file_path, 'a').close()
 
-  module_text_map, deprecation_footer_map = get_api_init_text(
+  (
+      module_text_map,
+      deprecation_footer_map,
+      root_module_footer,
+  ) = get_api_init_text(
       packages, output_package, api_name,
       api_version, compat_api_versions, lazy_loading, use_relative_imports)
 
@@ -652,6 +657,7 @@ def create_api_files(output_files, packages, root_init_template, output_dir,
       with open(root_init_template, 'r') as root_init_template_file:
         contents = root_init_template_file.read()
         contents = contents.replace('# API IMPORTS PLACEHOLDER', text)
+        contents = contents.replace('# __all__ PLACEHOLDER', root_module_footer)
     elif module in compat_module_to_template:
       # Read base init file for compat module
       with open(compat_module_to_template[module], 'r') as init_template_file:
diff --git a/tensorflow/python/tools/api/generator/create_python_api_test.py b/tensorflow/python/tools/api/generator/create_python_api_test.py
index 010f189dcb27f0..76404d6c82b33b 100644
--- a/tensorflow/python/tools/api/generator/create_python_api_test.py
+++ b/tensorflow/python/tools/api/generator/create_python_api_test.py
@@ -62,7 +62,7 @@ def tearDown(self):
     del sys.modules[_MODULE_NAME]
 
   def testFunctionImportIsAdded(self):
-    imports, _ = create_python_api.get_api_init_text(
+    imports, _, _ = create_python_api.get_api_init_text(
         packages=[create_python_api._DEFAULT_PACKAGE],
         output_package='tensorflow',
         api_name='tensorflow',
@@ -97,7 +97,7 @@ def testFunctionImportIsAdded(self):
                      msg='compat.v1 in %s' % str(imports.keys()))
 
   def testClassImportIsAdded(self):
-    imports, _ = create_python_api.get_api_init_text(
+    imports, _, _ = create_python_api.get_api_init_text(
         packages=[create_python_api._DEFAULT_PACKAGE],
         output_package='tensorflow',
         api_name='tensorflow',
@@ -116,7 +116,7 @@ def testClassImportIsAdded(self):
         msg='%s not in %s' % (expected_import, str(imports)))
 
   def testConstantIsAdded(self):
-    imports, _ = create_python_api.get_api_init_text(
+    imports, _, _ = create_python_api.get_api_init_text(
         packages=[create_python_api._DEFAULT_PACKAGE],
         output_package='tensorflow',
         api_name='tensorflow',
@@ -132,7 +132,7 @@ def testConstantIsAdded(self):
                     msg='%s not in %s' % (expected, str(imports)))
 
   def testCompatModuleIsAdded(self):
-    imports, _ = create_python_api.get_api_init_text(
+    imports, _, _ = create_python_api.get_api_init_text(
         packages=[create_python_api._DEFAULT_PACKAGE],
         output_package='tensorflow',
         api_name='tensorflow',
@@ -144,7 +144,7 @@ def testCompatModuleIsAdded(self):
                     msg='compat.v1.test not in %s' % str(imports.keys()))
 
   def testNestedCompatModulesAreAdded(self):
-    imports, _ = create_python_api.get_api_init_text(
+    imports, _, _ = create_python_api.get_api_init_text(
         packages=[create_python_api._DEFAULT_PACKAGE],
         output_package='tensorflow',
         api_name='tensorflow',
diff --git a/tensorflow/virtual_root_template_v1.__init__.py b/tensorflow/virtual_root_template_v1.__init__.py
index 236e9f52258973..9a45bc0355d0b7 100644
--- a/tensorflow/virtual_root_template_v1.__init__.py
+++ b/tensorflow/virtual_root_template_v1.__init__.py
@@ -132,7 +132,4 @@ def _forward_module(old_name):
 except NameError:
   pass
 
-# Manually patch keras and estimator so tf.keras and tf.estimator work
-keras = _sys.modules["tensorflow.keras"]
-if not _root_estimator: estimator = _sys.modules["tensorflow.estimator"]
 # LINT.ThenChange(//tensorflow/virtual_root_template_v2.__init__.py.oss)
diff --git a/tensorflow/virtual_root_template_v2.__init__.py b/tensorflow/virtual_root_template_v2.__init__.py
index 83c020182a8ee9..bd8c903e455db5 100644
--- a/tensorflow/virtual_root_template_v2.__init__.py
+++ b/tensorflow/virtual_root_template_v2.__init__.py
@@ -126,14 +126,4 @@ def _forward_module(old_name):
 except NameError:
   pass
 
-# TODO(mihaimaruseac): Revisit all of this once we release 2.1
-# Manually patch keras and estimator so tf.keras and tf.estimator work
-keras = _sys.modules["tensorflow.keras"]
-if not _root_estimator: estimator = _sys.modules["tensorflow.estimator"]
-# Also import module aliases
-try:
-  from tensorflow_core import losses, metrics, initializers, optimizers
-except ImportError:
-  pass
-
 # LINT.ThenChange(//tensorflow/virtual_root_template_v1.__init__.py.oss)

From 06a52093ffbab7317c0f8fcc09531427b28c7fd7 Mon Sep 17 00:00:00 2001
From: Amit Patankar <amitpatankar@google.com>
Date: Wed, 18 Dec 2019 19:43:07 -0800
Subject: [PATCH 100/130] Fixing an issue where scipy and tensorflow cannot be
 imported at the same time.

PiperOrigin-RevId: 286313739
Change-Id: I9d4dfb66a8e82c56249bc7f7b70c71d228912e9f
---
 tensorflow/tensorflow.bzl | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index dfa7192ba84a32..2d61d1fa7a4ffe 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -2451,7 +2451,14 @@ def pybind_extension(
         name = so_file,
         srcs = srcs + hdrs,
         data = data,
-        copts = copts + ["-fexceptions"],
+        copts = copts + [
+            "-fexceptions",
+        ] + select({
+            clean_dep("//tensorflow:windows"): [],
+            "//conditions:default": [
+                "-fvisibility=hidden",
+            ],
+        }),
         linkopts = linkopts + _rpath_linkopts(name) + select({
             "@local_config_cuda//cuda:darwin": [
                 "-Wl,-exported_symbols_list,$(location %s)" % exported_symbols_file,

From 58a9f2eb44d946443ce5450e4691a8487f68a683 Mon Sep 17 00:00:00 2001
From: TensorFlow Release Automation <jenkins@tensorflow.org>
Date: Thu, 19 Dec 2019 09:09:46 -0800
Subject: [PATCH 101/130] Update version numbers to 2.1.0-rc2

---
 tensorflow/core/public/version.h      | 2 +-
 tensorflow/tools/pip_package/setup.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h
index feeaec99e3f3fd..63c18c37da1f13 100644
--- a/tensorflow/core/public/version.h
+++ b/tensorflow/core/public/version.h
@@ -26,7 +26,7 @@ limitations under the License.
 
 // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
 // "-beta", "-rc", "-rc.1")
-#define TF_VERSION_SUFFIX "-rc1"
+#define TF_VERSION_SUFFIX "-rc2"
 
 #define TF_STR_HELPER(x) #x
 #define TF_STR(x) TF_STR_HELPER(x)
diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index f4afd1ad352b90..79ab9b48618155 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -47,7 +47,7 @@
 # result for pip.
 # Also update tensorflow/tensorflow.bzl and
 # tensorflow/core/public/version.h
-_VERSION = '2.1.0-rc1'
+_VERSION = '2.1.0-rc2'
 
 REQUIRED_PACKAGES = [
     'absl-py >= 0.7.0',

From e98198702e0fcf4e61cbd4b18cb869e453c13dc7 Mon Sep 17 00:00:00 2001
From: Mihai Maruseac <mihaimaruseac@google.com>
Date: Thu, 19 Dec 2019 10:22:28 -0800
Subject: [PATCH 102/130] Pin scipy to 1.4.1.

Fixes segfaults caused by scipy/scipy#11237 before 1.4.1 (observed at scipy==1.4.0 and any version of TF and scipy==1.1.0 and TF==2.1.0rc1 on a specific VM setup)

PiperOrigin-RevId: 286416747
Change-Id: I9f66f9145517d3b9279883a9292ae050b0dfa555
---
 tensorflow/tools/ci_build/builds/pip_new.sh                   | 2 +-
 .../tools/ci_build/install/install_centos_pip_packages.sh     | 4 ++--
 tensorflow/tools/ci_build/install/install_pip_packages.sh     | 4 ++--
 .../tools/ci_build/install/install_python3.5_pip_packages.sh  | 2 +-
 .../tools/ci_build/install/install_python3.6_pip_packages.sh  | 2 +-
 tensorflow/tools/ci_build/release/common.sh                   | 2 +-
 tensorflow/tools/pip_package/setup.py                         | 2 ++
 7 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/tensorflow/tools/ci_build/builds/pip_new.sh b/tensorflow/tools/ci_build/builds/pip_new.sh
index 2559dacd91551a..341177562f44ab 100755
--- a/tensorflow/tools/ci_build/builds/pip_new.sh
+++ b/tensorflow/tools/ci_build/builds/pip_new.sh
@@ -273,7 +273,7 @@ PYTHON_BIN_PATH_INIT=${PYTHON_BIN_PATH}
 PIP_BIN_PATH="$(which pip${PY_MAJOR_MINOR_VER})"
 
 # PIP packages
-INSTALL_EXTRA_PIP_PACKAGES="portpicker scipy scikit-learn ${TF_BUILD_INSTALL_EXTRA_PIP_PACKAGES}"
+INSTALL_EXTRA_PIP_PACKAGES="portpicker scipy==1.4.1 scikit-learn ${TF_BUILD_INSTALL_EXTRA_PIP_PACKAGES}"
 
 ###########################################################################
 # Build TF PIP Package
diff --git a/tensorflow/tools/ci_build/install/install_centos_pip_packages.sh b/tensorflow/tools/ci_build/install/install_centos_pip_packages.sh
index 7cd8d9f4418bd9..51c7a77079ac52 100755
--- a/tensorflow/tools/ci_build/install/install_centos_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_centos_pip_packages.sh
@@ -57,8 +57,8 @@ pip3 install --upgrade protobuf==3.6.1
 pip2 install --upgrade numpy==1.14.5
 pip3 install --upgrade numpy==1.14.5
 
-pip2 install scipy==1.1.0
-pip3 install scipy==1.1.0
+pip2 install scipy==1.2.2
+pip3 install scipy==1.4.1
 
 pip2 install scikit-learn==0.18.1
 pip3 install scikit-learn==0.18.1
diff --git a/tensorflow/tools/ci_build/install/install_pip_packages.sh b/tensorflow/tools/ci_build/install/install_pip_packages.sh
index bb13c795284e81..170482b45657c7 100755
--- a/tensorflow/tools/ci_build/install/install_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_pip_packages.sh
@@ -76,8 +76,8 @@ else
   pip3 install --upgrade numpy==1.14.5
 fi
 
-pip2 install scipy==1.1.0
-pip3 install scipy==1.1.0
+pip2 install scipy==1.2.2
+pip3 install scipy==1.4.1
 
 pip2 install scikit-learn==0.18.1
 pip3 install scikit-learn==0.18.1
diff --git a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh
index 135e8e81addceb..e68b3b24477531 100755
--- a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh
@@ -64,7 +64,7 @@ rm -rf /usr/lib/python3/dist-packages/six*
 # This workaround isn't needed for Ubuntu 16.04 or later.
 pip3.5 install --no-binary=:all: --upgrade numpy==1.14.5
 
-pip3.5 install scipy==0.18.1
+pip3.5 install scipy==1.4.1
 
 pip3.5 install scikit-learn==0.19.1
 
diff --git a/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh b/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh
index af62d9efc78e54..e3eaa843412482 100755
--- a/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh
@@ -76,7 +76,7 @@ rm -rf /usr/lib/python3/dist-packages/six*
 # This workaround isn't needed for Ubuntu 16.04 or later.
 pip3 install --no-binary=:all: --upgrade numpy==1.14.5
 
-pip3 install scipy==0.18.1
+pip3 install scipy==1.4.1
 
 pip3 install scikit-learn==0.19.1
 
diff --git a/tensorflow/tools/ci_build/release/common.sh b/tensorflow/tools/ci_build/release/common.sh
index 7b273fbfed1cc5..3b30fa6888d71c 100644
--- a/tensorflow/tools/ci_build/release/common.sh
+++ b/tensorflow/tools/ci_build/release/common.sh
@@ -215,7 +215,7 @@ function install_macos_pip_deps {
   ${SUDO_CMD} ${PIP_CMD} install --upgrade setuptools==39.1.0
   ${SUDO_CMD} ${PIP_CMD} install keras_applications==1.0.8 --no-deps
   ${SUDO_CMD} ${PIP_CMD} install keras_preprocessing==1.1.0 --no-deps
-  ${SUDO_CMD} ${PIP_CMD} install --upgrade mock portpicker scipy grpcio
+  ${SUDO_CMD} ${PIP_CMD} install --upgrade mock portpicker scipy==1.4.1 grpcio
   ${SUDO_CMD} ${PIP_CMD} install six==1.12.0
   ${SUDO_CMD} ${PIP_CMD} install scikit-learn==0.20.3
   ${SUDO_CMD} ${PIP_CMD} install numpy==1.14.5
diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index f4afd1ad352b90..faf613f22ac319 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -73,6 +73,8 @@
     # functools comes with python3, need to install the backport for python2
     'functools32 >= 3.2.3;python_version<"3"',
     'six >= 1.12.0',
+    # scipy < 1.4.1 causes segfaults due to pybind11
+    'scipy == 1.4.1',
 ]
 
 if sys.byteorder == 'little':

From ca2b3d0f9b7e0e5a5701f9175e63a320a3148d4d Mon Sep 17 00:00:00 2001
From: Mihai Maruseac <mihaimaruseac@google.com>
Date: Thu, 19 Dec 2019 11:15:00 -0800
Subject: [PATCH 103/130] Don't pin on macos either

---
 tensorflow/tools/ci_build/release/common.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tools/ci_build/release/common.sh b/tensorflow/tools/ci_build/release/common.sh
index 3b30fa6888d71c..7b273fbfed1cc5 100644
--- a/tensorflow/tools/ci_build/release/common.sh
+++ b/tensorflow/tools/ci_build/release/common.sh
@@ -215,7 +215,7 @@ function install_macos_pip_deps {
   ${SUDO_CMD} ${PIP_CMD} install --upgrade setuptools==39.1.0
   ${SUDO_CMD} ${PIP_CMD} install keras_applications==1.0.8 --no-deps
   ${SUDO_CMD} ${PIP_CMD} install keras_preprocessing==1.1.0 --no-deps
-  ${SUDO_CMD} ${PIP_CMD} install --upgrade mock portpicker scipy==1.4.1 grpcio
+  ${SUDO_CMD} ${PIP_CMD} install --upgrade mock portpicker scipy grpcio
   ${SUDO_CMD} ${PIP_CMD} install six==1.12.0
   ${SUDO_CMD} ${PIP_CMD} install scikit-learn==0.20.3
   ${SUDO_CMD} ${PIP_CMD} install numpy==1.14.5

From 96a2a1108d9d3473febf71510bbbced79302eab1 Mon Sep 17 00:00:00 2001
From: Andrew Audibert <aaudibert@google.com>
Date: Wed, 18 Dec 2019 18:08:10 -0800
Subject: [PATCH 104/130] Fix doc formatting in dataset_ops.py

The effect can be seen in the Args section of https://www.tensorflow.org/api_docs/python/tf/data/Dataset?version=nightly#shuffle

PiperOrigin-RevId: 286304265
Change-Id: I318caf0b33a92d881ad42065d0e4a7a603d91fc0
---
 tensorflow/python/data/ops/dataset_ops.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index 50f9514de11be0..ea4c138ea90cee 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -1176,7 +1176,6 @@ def shuffle(self, buffer_size, seed=None, reshuffle_each_iteration=None):
     [1, 0, 2]
     >>> list(dataset.as_numpy_iterator())  # doctest: +SKIP
     [1, 0, 2]
-    ```
 
     Args:
       buffer_size: A `tf.int64` scalar `tf.Tensor`, representing the number of

From 9f0aef46924d79e156cd2a974baeb84662443517 Mon Sep 17 00:00:00 2001
From: Mihai Maruseac <mihai.maruseac@gmail.com>
Date: Thu, 19 Dec 2019 14:04:49 -0800
Subject: [PATCH 105/130] Revert "<release 2.1>-<rc1> cherry-pick request:
 update tflite op versions"

---
 tensorflow/lite/toco/tflite/op_version.cc | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/lite/toco/tflite/op_version.cc b/tensorflow/lite/toco/tflite/op_version.cc
index a7a829e77e368c..39258339e0e27c 100644
--- a/tensorflow/lite/toco/tflite/op_version.cc
+++ b/tensorflow/lite/toco/tflite/op_version.cc
@@ -74,7 +74,7 @@ string GetMinimumRuntimeVersionForModel(const Model& model) {
           {{OperatorType::kCast, 1}, "1.5.0"},
           {{OperatorType::kConcatenation, 1}, "1.5.0"},
           {{OperatorType::kConcatenation, 2}, "1.14.0"},
-          {{OperatorType::kDepthToSpace, 1}, "2.1.0"},
+          {{OperatorType::kDepthToSpace, 1}, kPendingReleaseOpVersion},
           {{OperatorType::kFakeQuant, 1}, "1.5.0"},
           {{OperatorType::kFakeQuant, 2}, "1.10.0"},
           {{OperatorType::kFullyConnected, 1}, "1.5.0"},
@@ -82,7 +82,7 @@ string GetMinimumRuntimeVersionForModel(const Model& model) {
           {{OperatorType::kFullyConnected, 3}, "1.14.0"},
           {{OperatorType::kFullyConnected, 4}, "1.14.0"},
           {{OperatorType::kFullyConnected, 5}, "2.0.0"},
-          {{OperatorType::kFullyConnected, 6}, "2.1.0"},
+          {{OperatorType::kFullyConnected, 6}, kPendingReleaseOpVersion},
           {{OperatorType::kGather, 1}, "1.6.0"},
           {{OperatorType::kGather, 2}, "1.14.0"},
           {{OperatorType::kGather, 3}, "1.15.0"},
@@ -145,7 +145,7 @@ string GetMinimumRuntimeVersionForModel(const Model& model) {
           {{OperatorType::kSplitV, 1}, "1.13.1"},
           {{OperatorType::kStridedSlice, 1}, "1.6.0"},
           {{OperatorType::kStridedSlice, 2}, "1.14.0"},
-          {{OperatorType::kStridedSlice, 3}, "2.1.0"},
+          {{OperatorType::kStridedSlice, 3}, kPendingReleaseOpVersion},
           {{OperatorType::kTopK_V2, 1}, "1.7.0"},
           {{OperatorType::kTopK_V2, 2}, "1.14.0"},
           {{OperatorType::kArgMax, 1}, "1.9.0"},
@@ -205,7 +205,7 @@ string GetMinimumRuntimeVersionForModel(const Model& model) {
           {{OperatorType::kElu, 1}, "1.14.0"},
           {{OperatorType::kRound, 1}, "1.14.0"},
           {{OperatorType::kRelu, 1}, "1.5.0"},
-          {{OperatorType::kRelu, 2}, "2.1.0"},
+          {{OperatorType::kRelu, 2}, kPendingReleaseOpVersion},
           {{OperatorType::kRelu1, 1}, "1.5.0"},
           {{OperatorType::kPRelu, 1}, "1.8.0"},
           {{OperatorType::kExp, 1}, "1.7.0"},

From d49810b4e0cc1c51562d35727bd2a552d647a8a2 Mon Sep 17 00:00:00 2001
From: Mihai Maruseac <mihaimaruseac@google.com>
Date: Thu, 19 Dec 2019 13:50:02 -0800
Subject: [PATCH 106/130] For python2, pin scipy to 1.2.2 (latest released).

This means py2 won't get the fix in scipy/scipy#11237

PiperOrigin-RevId: 286456504
Change-Id: Ic94ee7e57dd6ea590d79aa643e5de4192709ff17
---
 tensorflow/tools/pip_package/setup.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index faf613f22ac319..0097ffc5c9ba0c 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -74,7 +74,9 @@
     'functools32 >= 3.2.3;python_version<"3"',
     'six >= 1.12.0',
     # scipy < 1.4.1 causes segfaults due to pybind11
-    'scipy == 1.4.1',
+    # Latest scipy pip for py2 is scipy==1.2.2
+    'scipy == 1.4.1;python_version>="3"',
+    'scipy == 1.2.2;python_version<"3"',
 ]
 
 if sys.byteorder == 'little':

From 6728f21dc8758b2098382af671de2e300348577c Mon Sep 17 00:00:00 2001
From: Mihai Maruseac <mihai.maruseac@gmail.com>
Date: Fri, 20 Dec 2019 09:00:43 -0800
Subject: [PATCH 107/130] Revert "Revert "<release 2.1>-<rc1> cherry-pick
 request: update tflite op versions""

---
 tensorflow/lite/toco/tflite/op_version.cc | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/lite/toco/tflite/op_version.cc b/tensorflow/lite/toco/tflite/op_version.cc
index 39258339e0e27c..a7a829e77e368c 100644
--- a/tensorflow/lite/toco/tflite/op_version.cc
+++ b/tensorflow/lite/toco/tflite/op_version.cc
@@ -74,7 +74,7 @@ string GetMinimumRuntimeVersionForModel(const Model& model) {
           {{OperatorType::kCast, 1}, "1.5.0"},
           {{OperatorType::kConcatenation, 1}, "1.5.0"},
           {{OperatorType::kConcatenation, 2}, "1.14.0"},
-          {{OperatorType::kDepthToSpace, 1}, kPendingReleaseOpVersion},
+          {{OperatorType::kDepthToSpace, 1}, "2.1.0"},
           {{OperatorType::kFakeQuant, 1}, "1.5.0"},
           {{OperatorType::kFakeQuant, 2}, "1.10.0"},
           {{OperatorType::kFullyConnected, 1}, "1.5.0"},
@@ -82,7 +82,7 @@ string GetMinimumRuntimeVersionForModel(const Model& model) {
           {{OperatorType::kFullyConnected, 3}, "1.14.0"},
           {{OperatorType::kFullyConnected, 4}, "1.14.0"},
           {{OperatorType::kFullyConnected, 5}, "2.0.0"},
-          {{OperatorType::kFullyConnected, 6}, kPendingReleaseOpVersion},
+          {{OperatorType::kFullyConnected, 6}, "2.1.0"},
           {{OperatorType::kGather, 1}, "1.6.0"},
           {{OperatorType::kGather, 2}, "1.14.0"},
           {{OperatorType::kGather, 3}, "1.15.0"},
@@ -145,7 +145,7 @@ string GetMinimumRuntimeVersionForModel(const Model& model) {
           {{OperatorType::kSplitV, 1}, "1.13.1"},
           {{OperatorType::kStridedSlice, 1}, "1.6.0"},
           {{OperatorType::kStridedSlice, 2}, "1.14.0"},
-          {{OperatorType::kStridedSlice, 3}, kPendingReleaseOpVersion},
+          {{OperatorType::kStridedSlice, 3}, "2.1.0"},
           {{OperatorType::kTopK_V2, 1}, "1.7.0"},
           {{OperatorType::kTopK_V2, 2}, "1.14.0"},
           {{OperatorType::kArgMax, 1}, "1.9.0"},
@@ -205,7 +205,7 @@ string GetMinimumRuntimeVersionForModel(const Model& model) {
           {{OperatorType::kElu, 1}, "1.14.0"},
           {{OperatorType::kRound, 1}, "1.14.0"},
           {{OperatorType::kRelu, 1}, "1.5.0"},
-          {{OperatorType::kRelu, 2}, kPendingReleaseOpVersion},
+          {{OperatorType::kRelu, 2}, "2.1.0"},
           {{OperatorType::kRelu1, 1}, "1.5.0"},
           {{OperatorType::kPRelu, 1}, "1.8.0"},
           {{OperatorType::kExp, 1}, "1.7.0"},

From 3b9aa5b40b4408e2e68e53ff927f67391bd1e63c Mon Sep 17 00:00:00 2001
From: Mihai Maruseac <mihaimaruseac@google.com>
Date: Fri, 20 Dec 2019 09:17:50 -0800
Subject: [PATCH 108/130] Remove pinning of scipy on . Same reasoning

---
 tensorflow/tools/ci_build/builds/pip_new.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tools/ci_build/builds/pip_new.sh b/tensorflow/tools/ci_build/builds/pip_new.sh
index 341177562f44ab..2559dacd91551a 100755
--- a/tensorflow/tools/ci_build/builds/pip_new.sh
+++ b/tensorflow/tools/ci_build/builds/pip_new.sh
@@ -273,7 +273,7 @@ PYTHON_BIN_PATH_INIT=${PYTHON_BIN_PATH}
 PIP_BIN_PATH="$(which pip${PY_MAJOR_MINOR_VER})"
 
 # PIP packages
-INSTALL_EXTRA_PIP_PACKAGES="portpicker scipy==1.4.1 scikit-learn ${TF_BUILD_INSTALL_EXTRA_PIP_PACKAGES}"
+INSTALL_EXTRA_PIP_PACKAGES="portpicker scipy scikit-learn ${TF_BUILD_INSTALL_EXTRA_PIP_PACKAGES}"
 
 ###########################################################################
 # Build TF PIP Package

From 93a27b64bf40327181fdbba53a9fd8499b4ecf08 Mon Sep 17 00:00:00 2001
From: Mihai Maruseac <mihai.maruseac@gmail.com>
Date: Fri, 20 Dec 2019 10:32:23 -0800
Subject: [PATCH 109/130] Revert "Set
 --incompatible_remove_legacy_whole_archive to False"

---
 .bazelrc | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/.bazelrc b/.bazelrc
index 9aca8b4261e870..638bb39fafd66e 100644
--- a/.bazelrc
+++ b/.bazelrc
@@ -137,19 +137,8 @@ build --announce_rc
 # Other build flags.
 build --define=grpc_no_ares=true
 
-# See https://github.com/bazelbuild/bazel/issues/7362 for information on what
-# --incompatible_remove_legacy_whole_archive flag does.
-# This flag is set to true in Bazel 1.0 and newer versions. We tried to migrate
-# Tensorflow to the default, however test coverage wasn't enough to catch the
-# errors.
-# There is ongoing work on Bazel team's side to provide support for transitive
-# shared libraries. As part of migrating to transitive shared libraries, we
-# hope to provide a better mechanism for control over symbol exporting, and
-# then tackle this issue again.
-#
-# TODO: Remove this line once TF doesn't depend on Bazel wrapping all library
-# archives in -whole_archive -no_whole_archive.
-build --noincompatible_remove_legacy_whole_archive
+# Prevent regression of https://github.com/bazelbuild/bazel/issues/7362
+build --incompatible_remove_legacy_whole_archive
 
 # Modular TF build options
 build:dynamic_kernels --define=dynamic_loaded_kernels=true

From 4bb06cece2607b0106c137dcc8b24ae81292ca0d Mon Sep 17 00:00:00 2001
From: Mihai Maruseac <mihai.maruseac@gmail.com>
Date: Fri, 20 Dec 2019 11:35:16 -0800
Subject: [PATCH 110/130] Revert "Revert "Set
 --incompatible_remove_legacy_whole_archive to False""

---
 .bazelrc | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/.bazelrc b/.bazelrc
index 638bb39fafd66e..9aca8b4261e870 100644
--- a/.bazelrc
+++ b/.bazelrc
@@ -137,8 +137,19 @@ build --announce_rc
 # Other build flags.
 build --define=grpc_no_ares=true
 
-# Prevent regression of https://github.com/bazelbuild/bazel/issues/7362
-build --incompatible_remove_legacy_whole_archive
+# See https://github.com/bazelbuild/bazel/issues/7362 for information on what
+# --incompatible_remove_legacy_whole_archive flag does.
+# This flag is set to true in Bazel 1.0 and newer versions. We tried to migrate
+# Tensorflow to the default, however test coverage wasn't enough to catch the
+# errors.
+# There is ongoing work on Bazel team's side to provide support for transitive
+# shared libraries. As part of migrating to transitive shared libraries, we
+# hope to provide a better mechanism for control over symbol exporting, and
+# then tackle this issue again.
+#
+# TODO: Remove this line once TF doesn't depend on Bazel wrapping all library
+# archives in -whole_archive -no_whole_archive.
+build --noincompatible_remove_legacy_whole_archive
 
 # Modular TF build options
 build:dynamic_kernels --define=dynamic_loaded_kernels=true

From c6170fb37c65556fda58a014d8a3235ad75f1cfc Mon Sep 17 00:00:00 2001
From: Mihai Maruseac <mihaimaruseac@google.com>
Date: Fri, 20 Dec 2019 15:33:46 -0800
Subject: [PATCH 111/130] Fix segfault when attempting to convert string to
 float16.

To make sure this gets fixed, add test for converting string to any numeric type.

PiperOrigin-RevId: 286650886
Change-Id: I81f770ec2bbd33a863e8057ce198c679912fa8e0
---
 tensorflow/python/BUILD                       | 11 ++++
 .../python/framework/constant_op_test.py      | 61 +++++++++++++++++++
 tensorflow/python/lib/core/py_seq_tensor.cc   | 35 +++++++----
 3 files changed, 95 insertions(+), 12 deletions(-)
 create mode 100644 tensorflow/python/framework/constant_op_test.py

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index bf8ad490dca464..9869bc9eaffbfc 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -1766,6 +1766,17 @@ py_library(
     ],
 )
 
+tf_py_test(
+    name = "framework_constant_op_test",
+    size = "small",
+    srcs = ["framework/constant_op_test.py"],
+    main = "framework/constant_op_test.py",
+    python_version = "PY3",
+    deps = [
+        ":constant_op",
+    ],
+)
+
 tf_py_test(
     name = "framework_registry_test",
     size = "small",
diff --git a/tensorflow/python/framework/constant_op_test.py b/tensorflow/python/framework/constant_op_test.py
new file mode 100644
index 00000000000000..da0fb64fde6f2b
--- /dev/null
+++ b/tensorflow/python/framework/constant_op_test.py
@@ -0,0 +1,61 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for tensorflow.python.framework.constant_op."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from absl.testing import parameterized
+
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.platform import test
+
+
+class ConstantOpTest(test.TestCase, parameterized.TestCase):
+
+  @parameterized.parameters(
+      dtypes.bfloat16,
+      dtypes.complex128,
+      dtypes.complex64,
+      dtypes.double,
+      dtypes.float16,
+      dtypes.float32,
+      dtypes.float64,
+      dtypes.half,
+      dtypes.int16,
+      dtypes.int32,
+      dtypes.int64,
+      dtypes.int8,
+      dtypes.qint16,
+      dtypes.qint32,
+      dtypes.qint8,
+      dtypes.quint16,
+      dtypes.quint8,
+      dtypes.uint16,
+      dtypes.uint32,
+      dtypes.uint64,
+      dtypes.uint8,
+  )
+  def test_convert_string_to_number(self, dtype):
+    with self.assertRaises(TypeError):
+      constant_op.constant("hello", dtype)
+
+
+if __name__ == "__main__":
+  ops.enable_eager_execution()
+  test.main()
diff --git a/tensorflow/python/lib/core/py_seq_tensor.cc b/tensorflow/python/lib/core/py_seq_tensor.cc
index 29f9fe22131ed7..054377fa1f7b1e 100644
--- a/tensorflow/python/lib/core/py_seq_tensor.cc
+++ b/tensorflow/python/lib/core/py_seq_tensor.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/python/lib/core/numpy.h"
 #include "tensorflow/python/lib/core/py_util.h"
@@ -373,6 +374,21 @@ typedef Converter<int32> Int32Converter;
 
 // Floating-point support
 
+// Returns `true` if `out` overflows when converted from `as_double`.
+template <class T>
+static inline bool CheckForOverflow(double as_double, T* out) {
+  return (sizeof(T) < sizeof(double) && std::isinf(*out) &&
+          std::isfinite(as_double));
+}
+
+// There is no `std::isinf` that takes `Eigen::half` as argument but Eigen
+// provides `Eigen::half_impl::isinf` instead.
+template <>
+inline bool CheckForOverflow<Eigen::half>(double as_double, Eigen::half* out) {
+  return (sizeof(Eigen::half) < sizeof(double) &&
+          Eigen::half_impl::isinf(*out) && std::isfinite(as_double));
+}
+
 template <class T>
 static const char* ConvertOneFloat(PyObject* v, T* out) {
   if (PyErr_Occurred()) {
@@ -382,20 +398,19 @@ static const char* ConvertOneFloat(PyObject* v, T* out) {
     const double as_double = PyFloat_AS_DOUBLE(v);
     *out = static_cast<T>(as_double);
     // Check for overflow
-    if (TF_PREDICT_FALSE(sizeof(T) < sizeof(double) && std::isinf(*out) &&
-                         std::isfinite(as_double))) {
+    if (TF_PREDICT_FALSE(CheckForOverflow<T>(as_double, out))) {
       return ErrorOutOfRangeDouble;
     }
     return nullptr;
   }
 #if PY_MAJOR_VERSION < 3
   if (PyInt_Check(v)) {
-    *out = PyInt_AS_LONG(v);
+    *out = static_cast<T>(PyInt_AS_LONG(v));
     return nullptr;
   }
 #endif
   if (PyLong_Check(v)) {
-    *out = PyLong_AsDouble(v);
+    *out = static_cast<T>(PyLong_AsDouble(v));
     if (PyErr_Occurred()) return ErrorOutOfRangeDouble;
     return nullptr;
   }
@@ -444,13 +459,7 @@ struct ConverterTraits<Eigen::half> {
   static const tensorflow::DataType kTypeEnum = DT_HALF;
 
   static const char* ConvertScalar(PyObject* v, Eigen::half* out) {
-    // NOTE(nareshmodi): Is there a way to convert to C double without the
-    // intermediate Python double? This will help with ConvertOneFloat as well.
-    Safe_PyObjectPtr as_float = make_safe(PyNumber_Float(v));
-    double v_double = PyFloat_AS_DOUBLE(as_float.get());
-    *out = Eigen::half(v_double);
-
-    return nullptr;
+    return ConvertOneFloat<Eigen::half>(v, out);
   }
 };
 
@@ -591,7 +600,9 @@ Status PySeqToTensor(PyObject* obj, DataType dtype, Tensor* ret) {
       break;
 
     case DT_HALF:
-      RETURN_STRING_AS_STATUS(NumpyHalfConverter::Convert(obj, shape, ret));
+      if (NumpyHalfConverter::Convert(obj, &shape, ret) == nullptr)
+        return Status::OK();
+      break;
 
     case DT_INT64:
       if (Int64Converter::Convert(obj, shape, ret) == nullptr)

From 74d8b75028e4659110cbe9e1f76e394c3c7f0e14 Mon Sep 17 00:00:00 2001
From: Mihai Maruseac <mihaimaruseac@google.com>
Date: Fri, 20 Dec 2019 16:04:39 -0800
Subject: [PATCH 112/130] Fix conflict where `additional_deps` should have been
 used instead of `deps`"

---
 tensorflow/python/BUILD | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 9869bc9eaffbfc..399983b73d1a75 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -1772,7 +1772,7 @@ tf_py_test(
     srcs = ["framework/constant_op_test.py"],
     main = "framework/constant_op_test.py",
     python_version = "PY3",
-    deps = [
+    additional_deps = [
         ":constant_op",
     ],
 )

From db4d04782c079ac78dc8b8d978843051a18355ce Mon Sep 17 00:00:00 2001
From: Mihai Maruseac <mihaimaruseac@google.com>
Date: Fri, 20 Dec 2019 16:26:04 -0800
Subject: [PATCH 113/130] Attempt to fix sanity, syntax rules are different on
 r2.1

---
 tensorflow/python/BUILD | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 399983b73d1a75..f2ca67521f257c 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -1770,11 +1770,10 @@ tf_py_test(
     name = "framework_constant_op_test",
     size = "small",
     srcs = ["framework/constant_op_test.py"],
-    main = "framework/constant_op_test.py",
-    python_version = "PY3",
     additional_deps = [
         ":constant_op",
     ],
+    main = "framework/constant_op_test.py",
 )
 
 tf_py_test(

From bd74d92d10926e21f02df2aa0f918b9328325870 Mon Sep 17 00:00:00 2001
From: Mihai Maruseac <mihaimaruseac@google.com>
Date: Fri, 20 Dec 2019 16:41:59 -0800
Subject: [PATCH 114/130] Typo from solving merge conflict

---
 tensorflow/python/lib/core/py_seq_tensor.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/lib/core/py_seq_tensor.cc b/tensorflow/python/lib/core/py_seq_tensor.cc
index 054377fa1f7b1e..082625b43df42f 100644
--- a/tensorflow/python/lib/core/py_seq_tensor.cc
+++ b/tensorflow/python/lib/core/py_seq_tensor.cc
@@ -600,7 +600,7 @@ Status PySeqToTensor(PyObject* obj, DataType dtype, Tensor* ret) {
       break;
 
     case DT_HALF:
-      if (NumpyHalfConverter::Convert(obj, &shape, ret) == nullptr)
+      if (NumpyHalfConverter::Convert(obj, shape, ret) == nullptr)
         return Status::OK();
       break;
 

From 5dc7e36b8516a6560ea096b35ef09064a339c407 Mon Sep 17 00:00:00 2001
From: Goldie Gadde <ggadde@google.com>
Date: Fri, 20 Dec 2019 17:07:24 -0800
Subject: [PATCH 115/130] Update RELEASE.md

---
 RELEASE.md | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/RELEASE.md b/RELEASE.md
index a50ba6abdd3a94..1b9b68ecbbe91c 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -12,14 +12,20 @@ TensorFlow 2.1 will be the last TF release supporting Python 2. Python 2 support
   * Experimental support for mixed precision is available on GPUs and Cloud TPUs. See [usage guide](https://www.tensorflow.org/guide/keras/mixed_precision).
   * Introduced the `TextVectorization` layer, which takes as input raw strings and takes care of text standardization, tokenization, n-gram generation, and vocabulary indexing. See this [end-to-end text classification example](https://colab.research.google.com/drive/1RvCnR7h0_l4Ekn5vINWToI9TNJdpUZB3). 
   * Keras `.compile` `.fit` `.evaluate` and `.predict` are allowed to be outside of the DistributionStrategy scope, as long as the model was constructed inside of a scope.
-  * Experimental support for Keras `.compile`, `.fit`, `.evaluate`, and `.predict` is available for Cloud TPU Pods.
+  * Experimental support for Keras `.compile`, `.fit`, `.evaluate`, and `.predict` is available for Cloud TPUs, Cloud TPU, for all types of Keras models (sequential, functional and subclassing models).
   * Automatic outside compilation is now enabled for Cloud TPUs. This allows `tf.summary` to be used more conveniently with Cloud TPUs.
   * Dynamic batch sizes with DistributionStrategy and Keras are supported on Cloud TPUs.
+  * Support for `.fit`, `.evaluate`, `.predict` on TPU using numpy data, in addition to `tf.data.Dataset`.
   * Keras reference implementations for many popular models are available in the TensorFlow [Model Garden](https://github.com/tensorflow/models/tree/master/official).
 * `tf.data`
   * Changes rebatching for `tf.data datasets` + DistributionStrategy for better performance. Note that the dataset also behaves slightly differently, in that the rebatched dataset cardinality will always be a multiple of the number of replicas.
+  * `tf.data.Dataset` now supports automatic data distribution and sharding in distributed environments, including on TPU pods.
+  * Distribution policies for `tf.data.Dataset` can now be tuned with 1. `data.experimental.AutoShardPolicy(OFF, AUTO, FILE, DATA)` 2. `data.experimental.ExternalStatePolicy(WARN, IGNORE, FAIL)`
 * `tf.debugging`
   * Add `tf.debugging.enable_check_numerics()` and `tf.debugging.disable_check_numerics()` to help debugging the root causes of issues involving infinities and `NaN`s.
+* `tf.distribute`
+  * Custom training loop support on TPUs and TPU pods is avaiable through `strategy.experimental_distribute_dataset`, `strategy.experimental_run_v2`, `strategy.reduce`.
+  * Support for a global distribution strategy through `tf.distribute.experimental_set_strategy(),` in addition to `strategy.scope()`.
 * `TensorRT`
   * [TensorRT 6.0](https://developer.nvidia.com/tensorrt#tensorrt-whats-new) is now supported and enabled by default. This adds support for more TensorFlow ops including Conv3D, Conv3DBackpropInputV2, AvgPool3D, MaxPool3D, ResizeBilinear, and ResizeNearestNeighbor. In addition, the TensorFlow-TensorRT python conversion API is exported as `tf.experimental.tensorrt.Converter`.
 * Environment variable `TF_DETERMINISTIC_OPS` has been added. When set to "true" or "1", this environment variable makes `tf.nn.bias_add` operate deterministically (i.e. reproducibly), but currently only when XLA JIT compilation is *not* enabled. Setting `TF_DETERMINISTIC_OPS` to "true" or "1" also makes cuDNN convolution and max-pooling operate deterministically. This makes Keras Conv\*D and MaxPool\*D layers operate deterministically in both the forward and backward directions when running on a CUDA-enabled GPU.
@@ -60,6 +66,8 @@ Because of [issues with building on windows](https://github.com/tensorflow/tenso
   * Refactors `NNAPI` Delegate to support detailed reason why an operation is not accelerated.
   * Converts hardswish subgraphs into atomic ops.
 * Other
+  * Critical stability updates for TPUs, especially in cases where the XLA compiler produces compilation errors.
+  * TPUs can now be re-initialized multiple times, using `tf.tpu.experimental.initialize_tpu_system`. 
   * Add `RaggedTensor.merge_dims()`.
   * Added new `uniform_row_length` row-partitioning tensor to `RaggedTensor`.
   * Add `shape` arg to `RaggedTensor.to_tensor`; Improve speed of `RaggedTensor.to_tensor`.
@@ -181,7 +189,7 @@ This enables writing forward compatible code: by explicitly importing either `te
 
 This release contains contributions from many people at Google, as well as:
 
-a6802739, Aaron Ma, Abdullah Selek, Abolfazl Shahbazi, Ag Ramesh, Albert Z. Guo, Albin Joy, Alex Itkes, Alex Sergeev, Alexander Pivovarov, Alexey Romanov, alhkad, Amit Srivastava, amoitra, Andrew Lihonosov, Andrii Prymostka, Anuj Rawat, Astropeak, Ayush Agrawal, Bairen Yi, Bas Aarts, Bastian Eichenberger, Ben Barsdell, Benjamin Peterson, bhack, Bharat Raghunathan, Bhavani Subramanian, Bryan Cutler, candy.dc, Cao Zongyan, Captain-Pool, Casper Da Costa-Luis, Chen Guoyin, Cheng Chang, chengchingwen, Chong Yan, Choong Yin Thong, Christopher Yeh, Clayne Robison, Coady, Patrick, Dan Ganea, David Norman, Denis Khalikov, Deven Desai, Diego Caballero, Duncan Dean, Duncan Riach, Dwight J Lyle, Eamon Ito-Fisher, eashtian3, EFanZh, ejot, Elroy Ashtian Jr, Eric Schweitz, Fangjun Kuang, Fei Hu, fo40225, formath, Fred Reiss, Frederic Bastien, Fredrik Knutsson, G. Hussain Chinoy, Gabriel, gehring, George Grzegorz Pawelczak, Gianluca Varisco, Gleb Popov, Greg Peatfield, Guillaume Klein, Gurpreet Singh, Gustavo Lima Chaves, haison, Haraldur TóMas HallgríMsson, HarikrishnanBalagopal, HåKon Sandsmark, I-Hong, Ilham Firdausi Putra, Imran Salam, Jason Zaman, Jason Zavaglia, jayhpark530, jefby, Jeff Daily, Jeffrey Poznanovic, Jekyll Lai, Jeroen BéDorf, Jerry Shih, jerryyin, jiakai, JiangXIAO, Joe Bowser, Joel Shapiro, Johan Gunnarsson, Jojimon Varghese, Joon, Josh Beal, Julian Niedermeier, Jun Wan, Junqin Zhang, Junyuan Xie, Justin Tunis, Kaixi Hou, Karl Lessard, Karthik Muthuraman, Kbhute-Ibm, khanhlvg, Koock Yoon, kstuedem, Kyuwon Kim, Lakshay Tokas, leike666666, leonard951, Leslie-Fang, Leslie-Fang-Intel, Li, Guizi, Lukas Folle, Lukas Geiger, Mahmoud Abuzaina, Manraj Singh Grover, Margaret Maynard-Reid, Mark Ryan, Matt Conley, Matthew Bentham, Matthew Denton, mbhuiyan, mdfaijul, Mei Jie, merturl, MichaelKonobeev, Michal W. Tarnowski, minds, mpppk, musikisomorphie, Nagy Mostafa, Nayana Thorat, Neil, Niels Ole Salscheider, Niklas SilfverströM, Niranjan Hasabnis, ocjosen, olramde, Pariksheet Pinjari, Patrick J. Lopresti, Patrik Gustavsson, per1234, PeterLee, Phan Van Nguyen Duc, Phillip Kravtsov, Pooya Davoodi, Pranav Marathe, Putra Manggala, Qingqing Cao, Rajeshwar Reddy T, Ramon ViñAs, Rasmus Diederichsen, Reuben Morais, richardbrks, robert, RonLek, Ryan Jiang, saishruthi, Saket Khandelwal, Saleem Abdulrasool, Sami Kama, Sana-Damani, Sergii Khomenko, Severen Redwood, Shubham Goyal, Sigrid Keydana, Siju Samuel, sleighsoft, smilu97, Son Tran, Srini511, srinivasan.narayanamoorthy, Sumesh Udayakumaran, Sungmann Cho, Tae-Hwan Jung, Taehoon Lee, Takeshi Watanabe, TengLu, terryky, TheMindVirus, ThisIsIsaac, Till Hoffmann, Timothy Liu, Tomer Gafner, Tongxuan Liu, Trent Lo, Trevor Morris, Uday Bondhugula, Vasileios Lioutas, vbvg2008, Vishnuvardhan Janapati, Vivek Suryamurthy, Wei Wang, Wen-Heng (Jack) Chung, wenxizhu, William D. Irons, winstonq, wyzhao, Xiaoming (Jason) Cui, Xinan Jiang, Xinping Wang, Yann-Yy, Yasir Modak, Yong Tang, Yongfeng Gu, Yuchen Ying, Yuxin Wu, zyeric, 王振华 (Zhenhua Wang)
+a6802739, Aaron Ma, Abdullah Selek, Abolfazl Shahbazi, Ag Ramesh, Albert Z. Guo, Albin Joy, Alex Itkes, Alex Sergeev, Alexander Pivovarov, Alexey Romanov, alhkad, Amit Srivastava, amoitra, Andrew Lihonosov, Andrii Prymostka, Anuj Rawat, Astropeak, Ayush Agrawal, Bairen Yi, Bas Aarts, Bastian Eichenberger, Ben Barsdell, Benjamin Peterson, bhack, Bharat Raghunathan, Bhavani Subramanian, Bryan Cutler, candy.dc, Cao Zongyan, Captain-Pool, Casper Da Costa-Luis, Chen Guoyin, Cheng Chang, chengchingwen, Chong Yan, Choong Yin Thong, Christopher Yeh, Clayne Robison, Coady, Patrick, Dan Ganea, David Norman, Denis Khalikov, Deven Desai, Diego Caballero, Duncan Dean, Duncan Riach, Dwight J Lyle, Eamon Ito-Fisher, eashtian3, EFanZh, ejot, Elroy Ashtian Jr, Eric Schweitz, Fangjun Kuang, Fei Hu, fo40225, formath, Fred Reiss, Frederic Bastien, Fredrik Knutsson, G. Hussain Chinoy, Gabriel, gehring, George Grzegorz Pawelczak, Gianluca Varisco, Gleb Popov, Greg Peatfield, Guillaume Klein, Gurpreet Singh, Gustavo Lima Chaves, haison, Haraldur TóMas HallgríMsson, HarikrishnanBalagopal, HåKon Sandsmark, I-Hong, Ilham Firdausi Putra, Imran Salam, Jason Zaman, Jason Zavaglia, jayhpark530, jefby, Jeff Daily, Jeffrey Poznanovic, Jekyll Lai, Jeroen BéDorf, Jerry Shih, jerryyin, jiakai, JiangXIAO, Joe Bowser, Joel Shapiro, Johan Gunnarsson, Jojimon Varghese, Joon, Josh Beal, Julian Niedermeier, Jun Wan, Junqin Zhang, Junyuan Xie, Justin Tunis, Kaixi Hou, Karl Lessard, Karthik Muthuraman, Kbhute-Ibm, khanhlvg, Koock Yoon, kstuedem, Kyuwon Kim, Lakshay Tokas, leike666666, leonard951, Leslie-Fang, Leslie-Fang-Intel, Li, Guizi, Lukas Folle, Lukas Geiger, Mahmoud Abuzaina, Manraj Singh Grover, Margaret Maynard-Reid, Mark Ryan, Matt Conley, Matthew Bentham, Matthew Denton, mbhuiyan, mdfaijul, Mei Jie, merturl, MichaelKonobeev, Michal W. Tarnowski, Milan Straka, minds, mpppk, musikisomorphie, Nagy Mostafa, Nayana Thorat, Neil, Niels Ole Salscheider, Niklas SilfverströM, Niranjan Hasabnis, ocjosen, olramde, Pariksheet Pinjari, Patrick J. Lopresti, Patrik Gustavsson, per1234, PeterLee, Phan Van Nguyen Duc, Phillip Kravtsov, Pooya Davoodi, Pranav Marathe, Putra Manggala, Qingqing Cao, Rajeshwar Reddy T, Ramon ViñAs, Rasmus Diederichsen, Reuben Morais, richardbrks, robert, RonLek, Ryan Jiang, saishruthi, Saket Khandelwal, Saleem Abdulrasool, Sami Kama, Sana-Damani, Sergii Khomenko, Severen Redwood, Shubham Goyal, Sigrid Keydana, Siju Samuel, sleighsoft, smilu97, Son Tran, Srini511, srinivasan.narayanamoorthy, Sumesh Udayakumaran, Sungmann Cho, Tae-Hwan Jung, Taehoon Lee, Takeshi Watanabe, TengLu, terryky, TheMindVirus, ThisIsIsaac, Till Hoffmann, Timothy Liu, Tomer Gafner, Tongxuan Liu, Trent Lo, Trevor Morris, Uday Bondhugula, Vasileios Lioutas, vbvg2008, Vishnuvardhan Janapati, Vivek Suryamurthy, Wei Wang, Wen-Heng (Jack) Chung, wenxizhu, William D. Irons, winstonq, wyzhao, Xiaoming (Jason) Cui, Xinan Jiang, Xinping Wang, Yann-Yy, Yasir Modak, Yong Tang, Yongfeng Gu, Yuchen Ying, Yuxin Wu, zyeric, 王振华 (Zhenhua Wang)
 
 # Release 2.0.0
 

From b8132f471b65d32e6016c974088a13da53be431b Mon Sep 17 00:00:00 2001
From: Goldie Gadde <ggadde@google.com>
Date: Fri, 20 Dec 2019 17:18:30 -0800
Subject: [PATCH 116/130] Update RELEASE.md

---
 RELEASE.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/RELEASE.md b/RELEASE.md
index 1b9b68ecbbe91c..91ccf2e507c5b6 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -20,11 +20,11 @@ TensorFlow 2.1 will be the last TF release supporting Python 2. Python 2 support
 * `tf.data`
   * Changes rebatching for `tf.data datasets` + DistributionStrategy for better performance. Note that the dataset also behaves slightly differently, in that the rebatched dataset cardinality will always be a multiple of the number of replicas.
   * `tf.data.Dataset` now supports automatic data distribution and sharding in distributed environments, including on TPU pods.
-  * Distribution policies for `tf.data.Dataset` can now be tuned with 1. `data.experimental.AutoShardPolicy(OFF, AUTO, FILE, DATA)` 2. `data.experimental.ExternalStatePolicy(WARN, IGNORE, FAIL)`
+  * Distribution policies for `tf.data.Dataset` can now be tuned with 1. `tf.data.experimental.AutoShardPolicy(OFF, AUTO, FILE, DATA)` 2. `tf.data.experimental.ExternalStatePolicy(WARN, IGNORE, FAIL)`
 * `tf.debugging`
   * Add `tf.debugging.enable_check_numerics()` and `tf.debugging.disable_check_numerics()` to help debugging the root causes of issues involving infinities and `NaN`s.
 * `tf.distribute`
-  * Custom training loop support on TPUs and TPU pods is avaiable through `strategy.experimental_distribute_dataset`, `strategy.experimental_run_v2`, `strategy.reduce`.
+  * Custom training loop support on TPUs and TPU pods is avaiable through `strategy.experimental_distribute_dataset`, `strategy.experimental_distribute_datasets_from_function`, `strategy.experimental_run_v2`, `strategy.reduce`.
   * Support for a global distribution strategy through `tf.distribute.experimental_set_strategy(),` in addition to `strategy.scope()`.
 * `TensorRT`
   * [TensorRT 6.0](https://developer.nvidia.com/tensorrt#tensorrt-whats-new) is now supported and enabled by default. This adds support for more TensorFlow ops including Conv3D, Conv3DBackpropInputV2, AvgPool3D, MaxPool3D, ResizeBilinear, and ResizeNearestNeighbor. In addition, the TensorFlow-TensorRT python conversion API is exported as `tf.experimental.tensorrt.Converter`.

From eaa4090235470af72efa2922897db140ea009481 Mon Sep 17 00:00:00 2001
From: Mihai Maruseac <mihaimaruseac@google.com>
Date: Mon, 23 Dec 2019 14:08:43 -0800
Subject: [PATCH 117/130] Fix bazelisk link

---
 RELEASE.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/RELEASE.md b/RELEASE.md
index 91ccf2e507c5b6..3468c459f4242b 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -96,7 +96,7 @@ Because of [issues with building on windows](https://github.com/tensorflow/tenso
   * Use `_protogen` suffix for proto library targets instead of `_cc_protogen` suffix.
   * Moving the checkpoint reader from `swig` to `pybind11`.
   * `tf.device` & `MirroredStrategy` now supports passing in a `tf.config.LogicalDevice`
-  * If you're building Tensorflow from source, consider using [bazelisk](https://github.com/bazelisk/bazel) to automatically download and use the correct Bazel version. Bazelisk reads the `.bazelversion` file at the root of the project directory.
+  * If you're building Tensorflow from source, consider using [bazelisk](https://github.com/bazelbuild/bazelisk) to automatically download and use the correct Bazel version. Bazelisk reads the `.bazelversion` file at the root of the project directory.
 
 ## Thanks to our Contributors
 

From f61b49891928eb7fe673dc8e771c4f77216b2aa9 Mon Sep 17 00:00:00 2001
From: Brian Zhao <bmzhao@google.com>
Date: Sat, 28 Dec 2019 18:49:35 -0800
Subject: [PATCH 118/130] Fixing .bazelversion to be the same version that CI
 uses for 2.1.0:
 https://github.com/tensorflow/tensorflow/blob/v2.1.0-rc2/tensorflow/tools/ci_build/install/install_bazel.sh#L18

---
 .bazelversion | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.bazelversion b/.bazelversion
index 30f6cf8d98ce36..25939d35c738f0 100644
--- a/.bazelversion
+++ b/.bazelversion
@@ -1 +1 @@
-0.26.1
+0.29.1

From 5a3c577ff3a6283736905ef5e2c5b1f7df683287 Mon Sep 17 00:00:00 2001
From: Mihai Maruseac <mihaimaruseac@google.com>
Date: Thu, 2 Jan 2020 08:30:04 -0800
Subject: [PATCH 119/130] Cherry-pick the release build scripts

---
 .../ci_build/presubmit/macos/py2_cc/build.sh  |  70 +++++++++++
 .../presubmit/ubuntu_16/android/build.sh      |  81 +++++++++++++
 .../ubuntu_16/cpu_py36_full/build.sh          |  96 +++++++++++++++
 .../ubuntu_16/gpu_py36_full/build.sh          | 114 ++++++++++++++++++
 .../presubmit/ubuntu_16/sanity/build.sh       |  86 +++++++++++++
 .../presubmit/windows/cpu_py36_full/build.bat |  44 +++++++
 .../presubmit/windows/gpu_py36_full/build.bat |  45 +++++++
 7 files changed, 536 insertions(+)
 create mode 100644 tensorflow/tools/ci_build/presubmit/macos/py2_cc/build.sh
 create mode 100644 tensorflow/tools/ci_build/presubmit/ubuntu_16/android/build.sh
 create mode 100644 tensorflow/tools/ci_build/presubmit/ubuntu_16/cpu_py36_full/build.sh
 create mode 100644 tensorflow/tools/ci_build/presubmit/ubuntu_16/gpu_py36_full/build.sh
 create mode 100644 tensorflow/tools/ci_build/presubmit/ubuntu_16/sanity/build.sh
 create mode 100644 tensorflow/tools/ci_build/presubmit/windows/cpu_py36_full/build.bat
 create mode 100644 tensorflow/tools/ci_build/presubmit/windows/gpu_py36_full/build.bat

diff --git a/tensorflow/tools/ci_build/presubmit/macos/py2_cc/build.sh b/tensorflow/tools/ci_build/presubmit/macos/py2_cc/build.sh
new file mode 100644
index 00000000000000..92acb7ab7fe19b
--- /dev/null
+++ b/tensorflow/tools/ci_build/presubmit/macos/py2_cc/build.sh
@@ -0,0 +1,70 @@
+#!/bin/bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# TODO(mihaimaruseac,hyey,ggadde): Convert to py3
+
+set -e
+
+# Error if we somehow forget to set the path to bazel_wrapper.py
+set -u
+BAZEL_WRAPPER_PATH=$1
+set +u
+
+# From this point on, logs can be publicly available
+set -x
+
+function setup_pip () {
+  install_pip2
+  python -m virtualenv tf_build_env --system-site-packages
+  source tf_build_env/bin/activate
+  install_macos_pip_deps
+}
+
+function run_build () {
+  # Run configure.
+  export TF_NEED_CUDA=0
+  export PYTHON_BIN_PATH=$(which python2)
+  yes "" | $PYTHON_BIN_PATH configure.py
+  tag_filters="-no_oss,-no_oss_py2,-gpu,-tpu,-benchmark-test,-nomac,-no_mac,-v1only"
+
+  # Get the default test targets for bazel.
+  source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh
+
+  "${BAZEL_WRAPPER_PATH}" \
+    test \
+    --build_tag_filters="${tag_filters}" \
+    --test_tag_filters="${tag_filters}" \
+    --action_env=PATH \
+    --remote_accept_cached=true \
+    --spawn_strategy=standalone \
+    --remote_local_fallback=false \
+    --remote_timeout=600 \
+    --strategy=Javac=standalone \
+    --strategy=Closure=standalone \
+    --genrule_strategy=standalone \
+    -- ${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/...
+
+  # Copy log to output to be available to GitHub
+  ls -la "$(bazel info output_base)/java.log"
+  cp "$(bazel info output_base)/java.log" "${KOKORO_ARTIFACTS_DIR}/"
+}
+
+source tensorflow/tools/ci_build/release/common.sh
+update_bazel_macos
+which bazel
+set_bazel_outdir
+
+setup_pip
+run_build
diff --git a/tensorflow/tools/ci_build/presubmit/ubuntu_16/android/build.sh b/tensorflow/tools/ci_build/presubmit/ubuntu_16/android/build.sh
new file mode 100644
index 00000000000000..5fe3c41ae59a10
--- /dev/null
+++ b/tensorflow/tools/ci_build/presubmit/ubuntu_16/android/build.sh
@@ -0,0 +1,81 @@
+#!/bin/bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+set -e
+
+# Error if we somehow forget to set the path to bazel_wrapper.py
+set -u
+BAZEL_WRAPPER_PATH=$1
+set +u
+
+# From this point on, logs can be publicly available
+set -x
+
+function run_build () {
+  export ANDROID_NDK_HOME="/opt/android-ndk-r17c"
+  export NDK_HOME=$ANDROID_NDK_HOME
+  export ANDROID_SDK_HOME="/opt/android-sdk/current"
+  export ANDROID_API_LEVEL="23"
+  export ANDROID_BUILD_TOOLS_VERSION="28.0.0"
+
+  ANDROID_OUT=android.out
+  ANDROID_OUT_TARGET=gen_android_out
+
+  # Run the presubmit android build.
+  tensorflow/tools/ci_build/builds/android.sh 2>&1 | tee tensorflow/tools/ci_build/builds/${ANDROID_OUT}
+  RC=${PIPESTATUS[0]}
+
+  # Since we are running the build remotely (rbe), we need to build a bazel
+  # target that would output the log generated above and return the expected
+  # error code.
+  cat << EOF > tensorflow/tools/ci_build/builds/BUILD
+package(default_visibility = ["//tensorflow:internal"])
+
+sh_test(
+    name = "${ANDROID_OUT_TARGET}",
+    srcs = ["${ANDROID_OUT_TARGET}.sh"],
+    data = ["${ANDROID_OUT}"],
+    tags = ["local"],
+)
+EOF
+
+  cat << EOF > tensorflow/tools/ci_build/builds/${ANDROID_OUT_TARGET}.sh
+#!/bin/bash
+cat tensorflow/tools/ci_build/builds/${ANDROID_OUT}
+exit ${RC}
+EOF
+
+  # Now trigger the rbe build that outputs the log
+  chmod +x tensorflow/tools/ci_build/builds/${ANDROID_OUT_TARGET}.sh
+
+  # Run bazel test command. Double test timeouts to avoid flakes.
+  # //tensorflow/core:platform_setround_test is not supported. See b/64264700
+  "${BAZEL_WRAPPER_PATH}" \
+    --host_jvm_args=-Dbazel.DigestFunction=SHA256 \
+    test \
+    --test_output=all \
+    tensorflow/tools/ci_build/builds:${ANDROID_OUT_TARGET}
+
+  # Copy log to output to be available to GitHub
+  ls -la "$(bazel info output_base)/java.log"
+  cp "$(bazel info output_base)/java.log" "${KOKORO_ARTIFACTS_DIR}/"
+}
+
+source tensorflow/tools/ci_build/release/common.sh
+update_bazel_linux
+which bazel
+
+run_build
diff --git a/tensorflow/tools/ci_build/presubmit/ubuntu_16/cpu_py36_full/build.sh b/tensorflow/tools/ci_build/presubmit/ubuntu_16/cpu_py36_full/build.sh
new file mode 100644
index 00000000000000..d852ba3796f434
--- /dev/null
+++ b/tensorflow/tools/ci_build/presubmit/ubuntu_16/cpu_py36_full/build.sh
@@ -0,0 +1,96 @@
+#!/bin/bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+set -e
+
+# Error if we somehow forget to set the path to bazel_wrapper.py
+set -u
+BAZEL_WRAPPER_PATH=$1
+set +u
+
+# From this point on, logs can be publicly available
+set -x
+
+function run_build () {
+  # Build a unique cache silo string.
+  UBUNTU_VERSION=$(lsb_release -a | grep Release | awk '{print $2}')
+  IMAGE_VERSION=$(cat /VERSION)
+  CACHE_SILO_VAL="cpu-py3-ubuntu-16-${UBUNTU_VERSION}-${IMAGE_VERSION}"
+
+  # Run configure.
+  # Do not run configure.py when doing remote build & test:
+  # Most things we set with configure.py are not used in a remote build setting,
+  # as the build will be defined by pre-configured build files that are checked
+  # in.
+  # TODO(klimek): Allow using the right set of bazel flags without the need to
+  # run configure.py; currently we need to carefully copy them, which is brittle.
+  export TF_NEED_GCP=0
+  export TF_NEED_HDFS=0
+  export TF_NEED_CUDA=0
+  export ACTION_PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
+  export PYTHON_BIN_PATH="/usr/bin/python3"
+  export TF2_BEHAVIOR=1
+  tag_filters="-no_oss,-oss_serial,-gpu,-tpu,-benchmark-test""$(maybe_skip_v1)"
+
+  # Get the default test targets for bazel.
+  source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh
+
+  # Run bazel test command. Double test timeouts to avoid flakes.
+  # //tensorflow/core:platform_setround_test is not supported. See b/64264700
+  "${BAZEL_WRAPPER_PATH}" \
+    test \
+    --config=rbe \
+    --python_path="${PYTHON_BIN_PATH}" \
+    --action_env=PATH="${ACTION_PATH}" \
+    --action_env=PYTHON_BIN_PATH="${PYTHON_BIN_PATH}" \
+    --action_env=TF2_BEHAVIOR="${TF2_BEHAVIOR}" \
+    --action_env=TF_PYTHON_CONFIG_REPO=@org_tensorflow//third_party/toolchains/preconfig/ubuntu16.04/py3 \
+    --action_env=TF_ENABLE_XLA=1 \
+    --test_tag_filters="${tag_filters}" \
+    --build_tag_filters="${tag_filters}" \
+    --test_lang_filters=cc,py \
+    --define=with_default_optimizations=true \
+    --define=framework_shared_object=true \
+    --define=with_xla_support=true \
+    -c opt \
+    --copt="-w" \
+    --copt=-mavx \
+    --linkopt=-lrt \
+    --distinct_host_configuration=false \
+    --remote_default_platform_properties="properties:{name:\"build\" value:\"${CACHE_SILO_VAL}\"}" \
+    --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010:toolchain \
+    --host_javabase=@bazel_toolchains//configs/ubuntu16_04_clang/1.1:jdk8 \
+    --javabase=@bazel_toolchains//configs/ubuntu16_04_clang/1.1:jdk8 \
+    --host_java_toolchain=@bazel_tools//tools/jdk:toolchain_hostjdk8 \
+    --java_toolchain=@bazel_tools//tools/jdk:toolchain_hostjdk8 \
+    --extra_toolchains=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010:cc-toolchain-k8 \
+    --extra_execution_platforms=@org_tensorflow//third_party/toolchains:rbe_ubuntu16.04-manylinux2010 \
+    --host_platform=@org_tensorflow//third_party/toolchains:rbe_ubuntu16.04-manylinux2010 \
+    --remote_timeout=3600 \
+    --platforms=@org_tensorflow//third_party/toolchains:rbe_ubuntu16.04-manylinux2010 \
+    -- \
+    ${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/...
+
+  # Copy log to output to be available to GitHub
+  ls -la "$(bazel info output_base)/java.log"
+  cp "$(bazel info output_base)/java.log" "${KOKORO_ARTIFACTS_DIR}/"
+}
+
+source tensorflow/tools/ci_build/release/common.sh
+update_bazel_linux
+which bazel
+
+run_build
diff --git a/tensorflow/tools/ci_build/presubmit/ubuntu_16/gpu_py36_full/build.sh b/tensorflow/tools/ci_build/presubmit/ubuntu_16/gpu_py36_full/build.sh
new file mode 100644
index 00000000000000..3fa4d4f1d7da45
--- /dev/null
+++ b/tensorflow/tools/ci_build/presubmit/ubuntu_16/gpu_py36_full/build.sh
@@ -0,0 +1,114 @@
+#!/bin/bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+set -e
+
+# Error if we somehow forget to set the path to bazel_wrapper.py
+set -u
+BAZEL_WRAPPER_PATH=$1
+set +u
+
+# From this point on, logs can be publicly available
+set -x
+
+function run_build () {
+  # Build a unique cache silo string.
+  UBUNTU_VERSION=$(lsb_release -a | grep Release | awk '{print $2}')
+  IMAGE_VERSION=$(cat /VERSION)
+  CACHE_SILO_VAL="gpu-py3-ubuntu-16-${UBUNTU_VERSION}-${IMAGE_VERSION}"
+
+  # Run configure.
+  # Do not run configure.py when doing remote build & test:
+  # Most things we set with configure.py are not used in a remote build setting,
+  # as the build will be defined by pre-configured build files that are checked
+  # in.
+  # TODO(klimek): Allow using the right set of bazel flags without the need to
+  # run configure.py; currently we need to carefully copy them, which is brittle.
+  export LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
+  # TODO(klimek): Remove once we don't try to read it while setting up the remote
+  # config for cuda (we currently don't use it, as it's only used when compiling
+  # with clang, but we still require it to be set anyway).
+  export TF_CUDA_COMPUTE_CAPABILITIES=6.0
+  export ACTION_PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
+  export PYTHON_BIN_PATH="/usr/bin/python3"
+  export TF2_BEHAVIOR=1
+  tag_filters="gpu,-no_gpu,-nogpu,-benchmark-test,-no_oss,-oss_serial""$(maybe_skip_v1)"
+
+  # Get the default test targets for bazel.
+  source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh
+
+  # Run bazel test command. Double test timeouts to avoid flakes.
+  # //tensorflow/core:platform_setround_test is not supported. See b/64264700
+  # TODO(klimek): Re-enable tensorrt tests (with different runtime image) once
+  # we can build them.
+  # TODO(klimek): Stop using action_env for things that are only needed during
+  # setup - we're artificially poisoning the cache.
+  "${BAZEL_WRAPPER_PATH}" \
+    test \
+    --config=rbe \
+    --python_path="${PYTHON_BIN_PATH}" \
+    --action_env=PATH="${ACTION_PATH}" \
+    --action_env=PYTHON_BIN_PATH="${PYTHON_BIN_PATH}" \
+    --action_env=TF2_BEHAVIOR="${TF2_BEHAVIOR}" \
+    --action_env=REMOTE_GPU_TESTING=1 \
+    --action_env=TF_CUDA_COMPUTE_CAPABILITIES="${TF_CUDA_COMPUTE_CAPABILITIES}" \
+    --action_env=TF_CUDA_CONFIG_REPO=@org_tensorflow//third_party/toolchains/preconfig/ubuntu16.04/cuda10.0-cudnn7 \
+    --action_env=TF_CUDA_VERSION=10 \
+    --action_env=TF_CUDNN_VERSION=7 \
+    --action_env=TF_NEED_TENSORRT=0 \
+    --action_env=TF_NEED_CUDA=1 \
+    --action_env=TF_PYTHON_CONFIG_REPO=@org_tensorflow//third_party/toolchains/preconfig/ubuntu16.04/py3 \
+    --test_env=LD_LIBRARY_PATH \
+    --test_tag_filters="${tag_filters}" \
+    --build_tag_filters="${tag_filters}" \
+    --test_lang_filters=cc,py \
+    --define=with_default_optimizations=true \
+    --define=framework_shared_object=true \
+    --define=with_xla_support=true \
+    --define=using_cuda_nvcc=true \
+    --define=use_fast_cpp_protos=true \
+    --define=allow_oversize_protos=true \
+    --define=grpc_no_ares=true \
+    -c opt \
+    --copt="-w" \
+    --copt=-mavx \
+    --linkopt=-lrt \
+    --distinct_host_configuration=false \
+    --remote_default_platform_properties="properties:{name:\"build\" value:\"${CACHE_SILO_VAL}\"}" \
+    --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.0:toolchain \
+    --host_javabase=@bazel_toolchains//configs/ubuntu16_04_clang/1.1:jdk8 \
+    --javabase=@bazel_toolchains//configs/ubuntu16_04_clang/1.0:jdk8 \
+    --host_java_toolchain=@bazel_tools//tools/jdk:toolchain_hostjdk8 \
+    --java_toolchain=@bazel_tools//tools/jdk:toolchain_hostjdk8 \
+    --extra_toolchains=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.0:toolchain-linux-x86_64 \
+    --extra_execution_platforms=@org_tensorflow//third_party/toolchains:rbe_cuda10.0-cudnn7-ubuntu16.04-manylinux2010,@org_tensorflow//third_party/toolchains:rbe_cuda10.0-cudnn7-ubuntu16.04-manylinux2010-gpu \
+    --host_platform=@org_tensorflow//third_party/toolchains:rbe_cuda10.0-cudnn7-ubuntu16.04-manylinux2010 \
+    --local_test_jobs=4 \
+    --remote_timeout=3600 \
+    --platforms=@org_tensorflow//third_party/toolchains:rbe_cuda10.0-cudnn7-ubuntu16.04-manylinux2010 \
+    -- \
+    ${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/...
+
+  # Copy log to output to be available to GitHub
+  ls -la "$(bazel info output_base)/java.log"
+  cp "$(bazel info output_base)/java.log" "${KOKORO_ARTIFACTS_DIR}/"
+}
+
+source tensorflow/tools/ci_build/release/common.sh
+update_bazel_linux
+which bazel
+
+run_build
diff --git a/tensorflow/tools/ci_build/presubmit/ubuntu_16/sanity/build.sh b/tensorflow/tools/ci_build/presubmit/ubuntu_16/sanity/build.sh
new file mode 100644
index 00000000000000..250b0c1253d3ec
--- /dev/null
+++ b/tensorflow/tools/ci_build/presubmit/ubuntu_16/sanity/build.sh
@@ -0,0 +1,86 @@
+#!/bin/bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+set -e
+
+# Error if we somehow forget to set the path to bazel_wrapper.py
+set -u
+BAZEL_WRAPPER_PATH=$1
+set +u
+
+# From this point on, logs can be publicly available
+set -x
+
+function install_pylint () {
+  # TODO(gunan): figure out why we get stuck with later versions of pylint.
+  # TODO(mihaimaruseac): this is used in the release build in the same way,
+  # maybe extract out to a common?
+  sudo python2 -m pip install pylint==1.6.4
+  sudo python3 -m pip install pylint==1.6.4
+}
+
+function run_sanity_checks () {
+  SANITY_OUT=ci_sanity.out
+  SANITY_OUT_TARGET=gen_ci_sanity_out
+
+  # Run tensorflow sanity checks.
+  tensorflow/tools/ci_build/ci_sanity.sh 2>&1 | tee tensorflow/tools/ci_build/${SANITY_OUT}
+  RC=${PIPESTATUS[0]}
+
+  # Since we are running the sanity build remotely (rbe), we need to build a bazel
+  # target that would output the log generated above and return the expected
+  # error code.
+  cat << EOF > tensorflow/tools/ci_build/BUILD
+package(default_visibility = ["//tensorflow:internal"])
+
+sh_test(
+    name = "${SANITY_OUT_TARGET}",
+    srcs = ["${SANITY_OUT_TARGET}.sh"],
+    data = ["${SANITY_OUT}"],
+    tags = ["local"],
+)
+EOF
+
+  cat << EOF > tensorflow/tools/ci_build/${SANITY_OUT_TARGET}.sh
+#!/bin/bash
+cat tensorflow/tools/ci_build/${SANITY_OUT}
+exit ${RC}
+EOF
+
+  # Now trigger the rbe build that outputs the log
+  chmod +x tensorflow/tools/ci_build/${SANITY_OUT_TARGET}.sh
+
+  # Run bazel test command. Double test timeouts to avoid flakes.
+  # //tensorflow/core:platform_setround_test is not supported. See b/64264700
+  "${BAZEL_WRAPPER_PATH}" \
+    --host_jvm_args=-Dbazel.DigestFunction=SHA256 \
+    test \
+    --test_output=all \
+    tensorflow/tools/ci_build:${SANITY_OUT_TARGET}
+
+  # Copy log to output to be available to GitHub
+  ls -la "$(bazel info output_base)/java.log"
+  cp "$(bazel info output_base)/java.log" "${KOKORO_ARTIFACTS_DIR}/"
+}
+
+
+source tensorflow/tools/ci_build/release/common.sh
+update_bazel_linux
+which bazel
+
+install_pylint
+
+run_sanity_checks
diff --git a/tensorflow/tools/ci_build/presubmit/windows/cpu_py36_full/build.bat b/tensorflow/tools/ci_build/presubmit/windows/cpu_py36_full/build.bat
new file mode 100644
index 00000000000000..fcc079f7b0e010
--- /dev/null
+++ b/tensorflow/tools/ci_build/presubmit/windows/cpu_py36_full/build.bat
@@ -0,0 +1,44 @@
+echo on
+setlocal enableextensions enabledelayedexpansion
+
+@REM This is the path to bazel_wrapper.py, should be set as an argument
+set BAZEL_WRAPPER_PATH=%~f1
+
+@REM Load common definitions, install bazel
+CALL tensorflow\tools\ci_build\release\common_win.bat
+
+@REM Set up common variables used through the script
+set WIN_OUT=win.out
+set WIN_OUT_TARGET=gen_win_out
+set BUILD_PATH=tensorflow/tools/ci_build/builds
+set GEN_SCRIPT=%BUILD_PATH%/%WIN_OUT_TARGET%.sh
+set GEN_BUILD=%BUILD_PATH%/BUILD
+
+@REM Run the presubmit win build.
+CALL tensorflow\tools\ci_build\windows\cpu\pip\run.bat --enable_remote_cache %* > %BUILD_PATH%/%WIN_OUT% 2>&1
+set RC=%errorlevel%
+
+@REM Since we are running the sanity build remotely (rbe), we need to build a bazel
+@REM target that would output the log generated above and return the expected
+@REM error code.
+echo package(default_visibility = ["//visibility:public"]) > %GEN_BUILD%
+echo. >> %GEN_BUILD%
+echo sh_test( >> %GEN_BUILD%
+echo     name = "%WIN_OUT_TARGET%", >> %GEN_BUILD%
+echo     srcs = ["%WIN_OUT_TARGET%.sh"], >> %GEN_BUILD%
+echo     data = ["%WIN_OUT%"], >> %GEN_BUILD%
+echo     tags = ["local"], >> %GEN_BUILD%
+echo ) >> %GEN_BUILD%
+
+echo #!/bin/bash > %GEN_SCRIPT%
+echo function rlocation() { >> %GEN_SCRIPT%
+echo   fgrep -m1 "$1 " "$RUNFILES_MANIFEST_FILE" ^| cut -d' ' -f2- >> %GEN_SCRIPT%
+echo } >> %GEN_SCRIPT%
+echo cat $(rlocation %BUILD_PATH%/%WIN_OUT%) >> %GEN_SCRIPT%
+echo exit %RC% >> %GEN_SCRIPT%
+
+@REM Now trigger the rbe build that outputs the log
+chmod +x %GEN_SCRIPT%
+
+@REM Run bazel test command.
+%PY_EXE% %BAZEL_WRAPPER_PATH% --output_user_root=%TMPDIR% --host_jvm_args=-Dbazel.DigestFunction=SHA256 test %BUILD_PATH%:%WIN_OUT_TARGET% --test_output=all
diff --git a/tensorflow/tools/ci_build/presubmit/windows/gpu_py36_full/build.bat b/tensorflow/tools/ci_build/presubmit/windows/gpu_py36_full/build.bat
new file mode 100644
index 00000000000000..80edefc2bf050e
--- /dev/null
+++ b/tensorflow/tools/ci_build/presubmit/windows/gpu_py36_full/build.bat
@@ -0,0 +1,45 @@
+echo on
+setlocal enableextensions enabledelayedexpansion
+
+@REM This is the path to bazel_wrapper.py, should be set as an argument
+set BAZEL_WRAPPER_PATH=%~f1
+
+@REM Load common definitions, install bazel
+CALL tensorflow\tools\ci_build\release\common_win.bat
+
+@REM Set up common variables used through the script
+set WIN_OUT=win.out
+set WIN_OUT_TARGET=gen_win_out
+set BUILD_PATH=tensorflow/tools/ci_build/builds
+set GEN_SCRIPT=%BUILD_PATH%/%WIN_OUT_TARGET%.sh
+set GEN_BUILD=%BUILD_PATH%/BUILD
+
+@REM Run the presubmit win build.
+CALL tensorflow\tools\ci_build\windows\gpu\pip\run.bat --enable_remote_cache %* > %BUILD_PATH%/%WIN_OUT% 2>&1
+set RC=%errorlevel%
+
+@REM Since we are running the sanity build remotely (rbe), we need to build a bazel
+@REM target that would output the log generated above and return the expected
+@REM error code.
+echo package(default_visibility = ["//visibility:public"]) > %GEN_BUILD%
+echo. >> %GEN_BUILD%
+echo sh_test( >> %GEN_BUILD%
+echo     name = "%WIN_OUT_TARGET%", >> %GEN_BUILD%
+echo     srcs = ["%WIN_OUT_TARGET%.sh"], >> %GEN_BUILD%
+echo     data = ["%WIN_OUT%"], >> %GEN_BUILD%
+echo     tags = ["local"], >> %GEN_BUILD%
+echo ) >> %GEN_BUILD%
+
+echo #!/bin/bash > %GEN_SCRIPT%
+echo function rlocation() { >> %GEN_SCRIPT%
+echo   fgrep -m1 "$1 " "$RUNFILES_MANIFEST_FILE" ^| cut -d' ' -f2- >> %GEN_SCRIPT%
+echo } >> %GEN_SCRIPT%
+echo cat $(rlocation %BUILD_PATH%/%WIN_OUT%) >> %GEN_SCRIPT%
+echo exit %RC% >> %GEN_SCRIPT%
+
+@REM Now trigger the rbe build that outputs the log
+chmod +x %GEN_SCRIPT%
+
+@REM Run bazel test command.
+%PY_EXE% %BAZEL_WRAPPER_PATH% --output_user_root=%TMPDIR% --host_jvm_args=-Dbazel.DigestFunction=SHA256 test %BUILD_PATH%:%WIN_OUT_TARGET% --test_output=all
+

From 2d5cbf1a2adad4c50195047f1a2aa3cfb6d5c5db Mon Sep 17 00:00:00 2001
From: Artem Belevich <tra@google.com>
Date: Thu, 19 Dec 2019 13:02:01 -0800
Subject: [PATCH 120/130] Provide integer variants of isnan/isfinite to work
 around a quirk in MSVC standard library.

PiperOrigin-RevId: 286447750
Change-Id: I5af8054ac69782e351cff440d88824eade90f963
---
 tensorflow/compiler/xla/service/hlo_parser.cc | 23 +++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc
index ef58b37b469d7a..46fa47eaf1a970 100644
--- a/tensorflow/compiler/xla/service/hlo_parser.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser.cc
@@ -2611,18 +2611,37 @@ struct MinMaxFiniteValue<bfloat16> {
   static double min() { return -max(); }
 };
 
+// MSVC's standard C++ library does not define isnan/isfinite for integer types.
+// To work around that we will need to provide our own.
+template <typename T>
+std::enable_if_t<std::is_floating_point<T>::value, bool> IsFinite(T val) {
+  return std::isfinite(val);
+}
+template <typename T>
+std::enable_if_t<std::is_floating_point<T>::value, bool> IsNaN(T val) {
+  return std::isnan(val);
+}
+template <typename T>
+std::enable_if_t<std::is_integral<T>::value, bool> IsFinite(T val) {
+  return std::isfinite(static_cast<double>(val));
+}
+template <typename T>
+std::enable_if_t<std::is_integral<T>::value, bool> IsNaN(T val) {
+  return std::isnan(static_cast<double>(val));
+}
+
 template <typename LiteralNativeT, typename ParsedElemT>
 bool HloParserImpl::CheckParsedValueIsInRange(LocTy loc, ParsedElemT value) {
   if (std::is_floating_point<ParsedElemT>::value) {
     auto value_as_native_t = static_cast<LiteralNativeT>(value);
     auto value_double_converted = static_cast<ParsedElemT>(value_as_native_t);
-    if (!std::isfinite(value) || std::isfinite(value_double_converted)) {
+    if (!IsFinite(value) || IsFinite(value_double_converted)) {
       value = value_double_converted;
     }
   }
   PrimitiveType literal_ty =
       primitive_util::NativeToPrimitiveType<LiteralNativeT>();
-  if (std::isnan(value) ||
+  if (IsNaN(value) ||
       (std::numeric_limits<ParsedElemT>::has_infinity &&
        (std::numeric_limits<ParsedElemT>::infinity() == value ||
         -std::numeric_limits<ParsedElemT>::infinity() == value))) {

From 4dfee0db430fb2b2bcf9a9d4e65dec38639af5ac Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 19 Dec 2019 11:59:48 -0800
Subject: [PATCH 121/130] Remove designated initializers.

PiperOrigin-RevId: 286436468
Change-Id: I61c4337f54f3a2533a03de65371c2aa1b1a76e5b
---
 tensorflow/compiler/xla/client/lib/tridiagonal.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/compiler/xla/client/lib/tridiagonal.cc b/tensorflow/compiler/xla/client/lib/tridiagonal.cc
index d2ea6d57069fc7..13cc3630137fd9 100644
--- a/tensorflow/compiler/xla/client/lib/tridiagonal.cc
+++ b/tensorflow/compiler/xla/client/lib/tridiagonal.cc
@@ -36,6 +36,8 @@ namespace {
 struct TridiagonalSystemShape {
   const int64 rank;
   const int64 num_equations;
+  TridiagonalSystemShape(int64 rk, int64 num_eqs)
+      : rank(rk), num_equations(num_eqs) {}
 };
 
 Status CheckSecondToLastDimension(const Shape& op_shape, int64 rank,
@@ -109,9 +111,7 @@ StatusOr<TridiagonalSystemShape> CheckSystemAndReturnShape(XlaOp lower_diagonal,
   TF_RETURN_IF_ERROR(CheckSecondToLastDimension(upper_diagonal_shape, rank, 1,
                                                 "upper diagonal"));
 
-  TridiagonalSystemShape result = {.rank = rank,
-                                   .num_equations = num_equations};
-  return result;
+  return TridiagonalSystemShape(rank, num_equations);
 }
 
 XlaOp Coefficient(XlaOp operand, int64 i) {

From 4c0721928b949ea67ea3f47650c7a65afe9611c5 Mon Sep 17 00:00:00 2001
From: Yunxing Dai <yunxing@google.com>
Date: Wed, 18 Dec 2019 22:18:58 -0800
Subject: [PATCH 122/130] [XLA] Remove use of designated initializers in
 dynamic dimension inference.

PiperOrigin-RevId: 286330416
Change-Id: I5878ab23b9596ac0c5dbf91394ee6ba8e5683c77
---
 .../xla/service/dynamic_dimension_inference.cc     | 14 +++++++-------
 .../xla/service/dynamic_dimension_inference.h      |  3 +++
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/tensorflow/compiler/xla/service/dynamic_dimension_inference.cc b/tensorflow/compiler/xla/service/dynamic_dimension_inference.cc
index e93e234f3db538..d58c9ce0e47a0b 100644
--- a/tensorflow/compiler/xla/service/dynamic_dimension_inference.cc
+++ b/tensorflow/compiler/xla/service/dynamic_dimension_inference.cc
@@ -469,7 +469,7 @@ Status DynamicDimensionInferenceVisitor::HandleSetDimensionSize(
     // Propagate dynamic dimension indicated by this set dimension size
     // instruction.
     parent_->SetDynamicSize(hlo, {}, hlo->dimension(), hlo->mutable_operand(1),
-                            {.stride = 1, .multiple_of = 1});
+                            DimensionConstraint(1, 1));
   }
 
   // Also Propagate dynamic dimension already set by operands.
@@ -813,7 +813,7 @@ Status DynamicDimensionInferenceVisitor::HandleReshape(HloInstruction* hlo) {
 
           parent_->SetDynamicSize(
               reshape, {}, output_dynamic_dimension, new_dynamic_size,
-              {.stride = 1, .multiple_of = constraint.multiple_of / divisor});
+              DimensionConstraint(1, constraint.multiple_of / divisor));
         }
 
         if (input_dim_size < output_dim_size) {
@@ -850,12 +850,12 @@ Status DynamicDimensionInferenceVisitor::HandleReshape(HloInstruction* hlo) {
               hlo->parent()->AddInstruction(HloInstruction::CreateBinary(
                   output_dynamic_size->shape(), HloOpcode::kMultiply,
                   new_dynamic_size, operand_dynamic_size));
+          int64 new_multiple_of_constraint =
+              constraint.multiple_of * output_dim_size /
+              operand->shape().dimensions(input_dynamic_dimension);
           parent_->SetDynamicSize(
               reshape, {}, output_dynamic_dimension, new_dynamic_size,
-              {.stride = 1,
-               .multiple_of =
-                   constraint.multiple_of * output_dim_size /
-                   operand->shape().dimensions(input_dynamic_dimension)});
+              DimensionConstraint(1, new_multiple_of_constraint));
         }
 
         return Status::OK();
@@ -1227,7 +1227,7 @@ Status DynamicDimensionInferenceVisitor::HandleParameter(HloInstruction* hlo) {
         parent_->SetDynamicSize(target_parameter,
                                 dynamic_dimension.parameter_index,
                                 dynamic_dimension.dimension, dynamic_size,
-                                {.stride = 1, .multiple_of = 1});
+                                DimensionConstraint(1, 1));
         return Status::OK();
       });
 }
diff --git a/tensorflow/compiler/xla/service/dynamic_dimension_inference.h b/tensorflow/compiler/xla/service/dynamic_dimension_inference.h
index 21808385ec21a8..070127796d6760 100644
--- a/tensorflow/compiler/xla/service/dynamic_dimension_inference.h
+++ b/tensorflow/compiler/xla/service/dynamic_dimension_inference.h
@@ -149,6 +149,9 @@ class DynamicDimensionInference {
   //
   //
   struct DimensionConstraint {
+    explicit DimensionConstraint(int64 s, int64 m)
+        : stride(s), multiple_of(m) {}
+    DimensionConstraint() : stride(1), multiple_of(1) {}
     // Stride represents the distance of a newly placed element and the previous
     // placed element on this dynamic dimension.
     int64 stride;

From 9badef2f3f124508dfd0679f67851c27b9a7bcb8 Mon Sep 17 00:00:00 2001
From: Artem Belevich <tra@google.com>
Date: Fri, 20 Dec 2019 14:37:39 -0800
Subject: [PATCH 123/130] Define _USE_MATH_DEFINES for windows builds.

PiperOrigin-RevId: 286642121
Change-Id: I8ec228fd768df5b20a11cdc63fcf219d6ab24de7
---
 .bazelrc                                                     | 4 ++++
 tensorflow/cc/gradients/math_grad.cc                         | 1 -
 tensorflow/compiler/xla/client/lib/math.cc                   | 4 +---
 tensorflow/core/lib/random/random_distributions.h            | 3 ---
 tensorflow/core/lib/random/random_distributions_test.cc      | 2 +-
 tensorflow/lite/experimental/microfrontend/lib/window_util.c | 2 --
 tensorflow/tensorflow.bzl                                    | 2 ++
 7 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/.bazelrc b/.bazelrc
index 9aca8b4261e870..7beb93e42072ae 100644
--- a/.bazelrc
+++ b/.bazelrc
@@ -160,6 +160,10 @@ build:c++17 --cxxopt=-std=c++1z
 build:c++17 --cxxopt=-stdlib=libc++
 build:c++1z --config=c++17
 
+# Tensorflow uses M_* math constants that only get defined by MSVC headers if
+# _USE_MATH_DEFINES is defined.
+build:windows --copt=/D_USE_MATH_DEFINES
+
 # Default paths for TF_SYSTEM_LIBS
 build --define=PREFIX=/usr
 build --define=LIBDIR=$(PREFIX)/lib
diff --git a/tensorflow/cc/gradients/math_grad.cc b/tensorflow/cc/gradients/math_grad.cc
index b3c1e6a913a4fa..f67c6f91d6c9e7 100644
--- a/tensorflow/cc/gradients/math_grad.cc
+++ b/tensorflow/cc/gradients/math_grad.cc
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#define _USE_MATH_DEFINES
 #include <cmath>
 
 #include "tensorflow/cc/ops/array_ops_internal.h"
diff --git a/tensorflow/compiler/xla/client/lib/math.cc b/tensorflow/compiler/xla/client/lib/math.cc
index 989968b5cbc889..81c3a874f4ea4d 100644
--- a/tensorflow/compiler/xla/client/lib/math.cc
+++ b/tensorflow/compiler/xla/client/lib/math.cc
@@ -15,9 +15,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/client/lib/math.h"
 
-// This macro is required to make MSVC defines math constants in math.h
-#define _USE_MATH_DEFINES
-#include <math.h>
+#include <cmath>
 
 #include "tensorflow/compiler/xla/client/lib/arithmetic.h"
 #include "tensorflow/compiler/xla/client/lib/constants.h"
diff --git a/tensorflow/core/lib/random/random_distributions.h b/tensorflow/core/lib/random/random_distributions.h
index 6fb6babe7ec77a..6f40816aedb9e0 100644
--- a/tensorflow/core/lib/random/random_distributions.h
+++ b/tensorflow/core/lib/random/random_distributions.h
@@ -18,10 +18,7 @@ limitations under the License.
 
 #include <string.h>
 
-#define _USE_MATH_DEFINES
-#include <math.h>
 #include <cmath>
-#undef _USE_MATH_DEFINES
 
 #include <algorithm>
 #include <type_traits>
diff --git a/tensorflow/core/lib/random/random_distributions_test.cc b/tensorflow/core/lib/random/random_distributions_test.cc
index 8868672a10ae02..a49731601828cf 100644
--- a/tensorflow/core/lib/random/random_distributions_test.cc
+++ b/tensorflow/core/lib/random/random_distributions_test.cc
@@ -15,8 +15,8 @@ limitations under the License.
 
 #include "tensorflow/core/lib/random/random_distributions.h"
 
-#include <math.h>
 #include <algorithm>
+#include <cmath>
 #include <functional>
 #include <numeric>
 #include <unordered_map>
diff --git a/tensorflow/lite/experimental/microfrontend/lib/window_util.c b/tensorflow/lite/experimental/microfrontend/lib/window_util.c
index 3e544f5dd385e5..eee6e7b56ef340 100644
--- a/tensorflow/lite/experimental/microfrontend/lib/window_util.c
+++ b/tensorflow/lite/experimental/microfrontend/lib/window_util.c
@@ -14,8 +14,6 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/lite/experimental/microfrontend/lib/window_util.h"
 
-// This macro is required to make MSVC defines math constants in math.h
-#define _USE_MATH_DEFINES
 #include <math.h>
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index 048a6ebcd50a37..740f24ec4a4afe 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -263,6 +263,8 @@ def get_win_copts(is_external = False):
         # "/EHs-c-",
         "/wd4577",
         "/DNOGDI",
+        # Also see build:windows lines in tensorflow/opensource_only/.bazelrc
+        # where we set some other options globally.
     ]
     if is_external:
         return WINDOWS_COPTS + ["/UTF_COMPILE_LIBRARY"]

From a8adad90ac581d99d2a1ab2517602fda7649d6cf Mon Sep 17 00:00:00 2001
From: Saleem Abdulrasool <abdulras@google.com>
Date: Thu, 2 Jan 2020 16:24:57 -0800
Subject: [PATCH 124/130] configure.py: add `-D_USE_MATH_DEFINES` manually

This adds `--ccopt -D_USE_MATH_DEFINES` manually to the .bazelrc as the
verison of bazel used on r2.1 (0.29.x) does not support platform
specific options.  This allows the windows build to function by default.
---
 configure.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure.py b/configure.py
index 530041ec0c4f31..8ec47294b47e88 100644
--- a/configure.py
+++ b/configure.py
@@ -1233,7 +1233,7 @@ def set_windows_build_flags(environ_cp):
   # Fix winsock2.h conflicts
   write_to_bazelrc(
       'build --copt=-DWIN32_LEAN_AND_MEAN --host_copt=-DWIN32_LEAN_AND_MEAN '
-      '--copt=-DNOGDI --host_copt=-DNOGDI')
+      '--copt=-DNOGDI --host_copt=-DNOGDI --copt=-D_USE_MATH_DEFINES')
   # Output more verbose information when something goes wrong
   write_to_bazelrc('build --verbose_failures')
   # The host and target platforms are the same in Windows build. So we don't

From 5b7addfa22315b1d66d604b6d7ced5a324622397 Mon Sep 17 00:00:00 2001
From: Mihai Maruseac <mihaimaruseac@google.com>
Date: Thu, 2 Jan 2020 12:00:56 -0800
Subject: [PATCH 125/130] Add Python3.7 testing on MacOS as we drop support for
 Python2.

To be cherry-picked on `r1.15`, `r2.0`, and `r2.1` branches.

PiperOrigin-RevId: 287871757
Change-Id: Ic530e884de421a39a82c686f1f0d086b6400d75c
---
 .../ci_build/presubmit/macos/py37_cc/build.sh | 68 +++++++++++++++++++
 1 file changed, 68 insertions(+)
 create mode 100644 tensorflow/tools/ci_build/presubmit/macos/py37_cc/build.sh

diff --git a/tensorflow/tools/ci_build/presubmit/macos/py37_cc/build.sh b/tensorflow/tools/ci_build/presubmit/macos/py37_cc/build.sh
new file mode 100644
index 00000000000000..ffc823a6e2eb56
--- /dev/null
+++ b/tensorflow/tools/ci_build/presubmit/macos/py37_cc/build.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+set -e
+
+# Error if we somehow forget to set the path to bazel_wrapper.py
+set -u
+BAZEL_WRAPPER_PATH=$1
+set +u
+
+# From this point on, logs can be publicly available
+set -x
+
+function setup_pip () {
+  python3.7 -m virtualenv tf_build_env --system-site-packages
+  source tf_build_env/bin/activate
+  install_macos_pip_deps
+}
+
+function run_build () {
+  # Run configure.
+  export TF_NEED_CUDA=0
+  export PYTHON_BIN_PATH=$(which python3.7)
+  yes "" | $PYTHON_BIN_PATH configure.py
+  tag_filters="-no_oss,-no_oss_py2,-gpu,-tpu,-benchmark-test,-nomac,-no_mac,-v1only"
+
+  # Get the default test targets for bazel.
+  source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh
+
+  "${BAZEL_WRAPPER_PATH}" \
+    test \
+    --build_tag_filters="${tag_filters}" \
+    --test_tag_filters="${tag_filters}" \
+    --action_env=PATH \
+    --remote_accept_cached=true \
+    --spawn_strategy=standalone \
+    --remote_local_fallback=false \
+    --remote_timeout=600 \
+    --strategy=Javac=standalone \
+    --strategy=Closure=standalone \
+    --genrule_strategy=standalone \
+    -- ${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/...
+
+  # Copy log to output to be available to GitHub
+  ls -la "$(bazel info output_base)/java.log"
+  cp "$(bazel info output_base)/java.log" "${KOKORO_ARTIFACTS_DIR}/"
+}
+
+source tensorflow/tools/ci_build/release/common.sh
+update_bazel_macos
+which bazel
+set_bazel_outdir
+
+setup_pip
+run_build

From 3ed1f0218eeddfbfecb021b2e9f585da860420f1 Mon Sep 17 00:00:00 2001
From: Mahmoud Abuzaina <mahmoud.abuzaina@intel.com>
Date: Thu, 26 Dec 2019 10:29:27 -0800
Subject: [PATCH 126/130] Cherry-picking 55e20a6 - klockwork fix

---
 tensorflow/core/common_runtime/eager/mkl_eager_op_rewrite.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/common_runtime/eager/mkl_eager_op_rewrite.cc b/tensorflow/core/common_runtime/eager/mkl_eager_op_rewrite.cc
index b9db52c023d4e7..42494a1f2e5745 100644
--- a/tensorflow/core/common_runtime/eager/mkl_eager_op_rewrite.cc
+++ b/tensorflow/core/common_runtime/eager/mkl_eager_op_rewrite.cc
@@ -87,7 +87,7 @@ REGISTER_REWRITE(EagerOpRewriteRegistry::PRE_EXECUTION, MklEagerOpRewrite);
 
 // Constructor
 MklEagerOpRewrite::MklEagerOpRewrite(string name, string file, string line)
-    : EagerOpRewrite(name, file, line) {
+    : EagerOpRewrite(name, file, line), registered_kernels_map_() {
   InsertMKLEagerOps({"BatchMatMul", AlwaysRewrite, CreateGenericMklOp});
   InsertMKLEagerOps({"BatchMatMulV2", AlwaysRewrite, CreateGenericMklOp});
   InsertMKLEagerOps({"Conv2D", RewriteConv2D, CreateMklConv2DOp});

From d43e3c70d5cf72a89b5b07df6253f3fe01514439 Mon Sep 17 00:00:00 2001
From: TensorFlow Release Automation <jenkins@tensorflow.org>
Date: Mon, 6 Jan 2020 13:54:36 -0800
Subject: [PATCH 127/130] Update version numbers to 2.1.0

---
 tensorflow/core/public/version.h      | 2 +-
 tensorflow/tools/pip_package/setup.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h
index 63c18c37da1f13..10d6b545b2a254 100644
--- a/tensorflow/core/public/version.h
+++ b/tensorflow/core/public/version.h
@@ -26,7 +26,7 @@ limitations under the License.
 
 // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
 // "-beta", "-rc", "-rc.1")
-#define TF_VERSION_SUFFIX "-rc2"
+#define TF_VERSION_SUFFIX ""
 
 #define TF_STR_HELPER(x) #x
 #define TF_STR(x) TF_STR_HELPER(x)
diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index 1b5ff55e614a5e..c1cc78e3269300 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -47,7 +47,7 @@
 # result for pip.
 # Also update tensorflow/tensorflow.bzl and
 # tensorflow/core/public/version.h
-_VERSION = '2.1.0-rc2'
+_VERSION = '2.1.0'
 
 REQUIRED_PACKAGES = [
     'absl-py >= 0.7.0',

From 9f67747638b35c9e5e3aa914e17077bd09bb856e Mon Sep 17 00:00:00 2001
From: Alvin Portillo <alvin@8thwall.com>
Date: Wed, 1 Apr 2020 14:08:00 -0700
Subject: [PATCH 128/130] Update c api visibility

---
 tensorflow/lite/experimental/c/BUILD | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tensorflow/lite/experimental/c/BUILD b/tensorflow/lite/experimental/c/BUILD
index c9da03a070a88b..8f0c29eb755cbc 100644
--- a/tensorflow/lite/experimental/c/BUILD
+++ b/tensorflow/lite/experimental/c/BUILD
@@ -5,7 +5,7 @@ load(
 )
 
 package(
-    default_visibility = [":experimental"],
+    default_visibility = ["//visibility:public"],
     licenses = ["notice"],  # Apache 2.0
 )
 
@@ -64,7 +64,7 @@ cc_library(
     ],
     copts = tflite_copts(),
     visibility = [
-        ":experimental",
+        "//visibility:public",
     ],
     deps = [
         ":c_api_internal",
@@ -110,6 +110,9 @@ cc_test(
     size = "small",
     srcs = ["c_api_experimental_test.cc"],
     data = ["//tensorflow/lite:testdata/add.bin"],
+    visibility = [
+        "//visibility:public",
+    ],
     deps = [
         ":c_api",
         ":c_api_experimental",

From 26dfffbedad7dadf7847952ee509c0e802f34ae7 Mon Sep 17 00:00:00 2001
From: erikmchut <mc@8thwall.com>
Date: Wed, 1 Apr 2020 17:02:32 -0700
Subject: [PATCH 129/130] Silence build warnings in tflite

Silence build warnings in tflite
---
 tensorflow/lite/build_def.bzl          | 3 +++
 tensorflow/lite/delegates/nnapi/BUILD  | 3 +++
 tensorflow/lite/kernels/internal/BUILD | 5 ++++-
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/tensorflow/lite/build_def.bzl b/tensorflow/lite/build_def.bzl
index 9753e9bf35c57a..a6236f98da6f65 100644
--- a/tensorflow/lite/build_def.bzl
+++ b/tensorflow/lite/build_def.bzl
@@ -24,7 +24,10 @@ def tflite_copts():
             "/wd4018",  # -Wno-sign-compare
         ],
         "//conditions:default": [
+            "-Wno-deprecated-declarations",
             "-Wno-sign-compare",
+            "-Wno-unused-const-variable",
+            "-Wno-unused-function",
         ],
     }) + select({
         clean_dep("//tensorflow:optimized"): ["-O3"],
diff --git a/tensorflow/lite/delegates/nnapi/BUILD b/tensorflow/lite/delegates/nnapi/BUILD
index aa516543244f1a..180443c8b956c4 100644
--- a/tensorflow/lite/delegates/nnapi/BUILD
+++ b/tensorflow/lite/delegates/nnapi/BUILD
@@ -26,6 +26,9 @@ cc_library(
         "nnapi_delegate.h",
         "nnapi_delegate_kernel.h",
     ],
+    copts = [
+        "-Wno-unused-private-field",
+    ],
     deps = [
         "//tensorflow/lite:allocation",
         "//tensorflow/lite:kernel_api",
diff --git a/tensorflow/lite/kernels/internal/BUILD b/tensorflow/lite/kernels/internal/BUILD
index 50b936668092ee..f2dc8c91ea3afa 100644
--- a/tensorflow/lite/kernels/internal/BUILD
+++ b/tensorflow/lite/kernels/internal/BUILD
@@ -565,7 +565,10 @@ cc_library(
         "optimized/neon_tensor_utils.h",
         "optimized/neon_tensor_utils_impl.h",
     ],
-    copts = NEON_FLAGS_IF_APPLICABLE + HARD_FP_FLAGS_IF_APPLICABLE,
+    copts = NEON_FLAGS_IF_APPLICABLE + HARD_FP_FLAGS_IF_APPLICABLE + [
+        "-Wno-deprecated-declarations",
+        "-Wno-unused-function",
+    ],
     deps = [
         ":common",
         ":compatibility",

From 2c4454f8c198dca10d2e0fcaa73a3885b0b9a6a6 Mon Sep 17 00:00:00 2001
From: mc <mc@8thwall.user>
Date: Thu, 2 Apr 2020 08:58:06 -0700
Subject: [PATCH 130/130] Pick in XNNPACK

---
 tensorflow/lite/delegates/xnnpack/BUILD       |  83 ++
 tensorflow/lite/delegates/xnnpack/README.md   |  81 ++
 .../lite/delegates/xnnpack/conv_2d_test.cc    | 510 +++++++++++
 .../xnnpack/depthwise_conv_2d_test.cc         | 433 +++++++++
 .../delegates/xnnpack/xnnpack_delegate.cc     | 826 ++++++++++++++++++
 .../lite/delegates/xnnpack/xnnpack_delegate.h |  47 +
 tensorflow/workspace.bzl                      |  10 +
 third_party/clog/BUILD.bazel                  |  36 +
 third_party/cpuinfo/BUILD.bazel               | 223 +++++
 third_party/cpuinfo/workspace.bzl             |  15 +
 third_party/psimd/workspace.bzl               |  15 +
 11 files changed, 2279 insertions(+)
 create mode 100644 tensorflow/lite/delegates/xnnpack/BUILD
 create mode 100644 tensorflow/lite/delegates/xnnpack/README.md
 create mode 100644 tensorflow/lite/delegates/xnnpack/conv_2d_test.cc
 create mode 100644 tensorflow/lite/delegates/xnnpack/depthwise_conv_2d_test.cc
 create mode 100644 tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc
 create mode 100644 tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h
 create mode 100644 third_party/clog/BUILD.bazel
 create mode 100644 third_party/cpuinfo/BUILD.bazel
 create mode 100644 third_party/cpuinfo/workspace.bzl
 create mode 100644 third_party/psimd/workspace.bzl

diff --git a/tensorflow/lite/delegates/xnnpack/BUILD b/tensorflow/lite/delegates/xnnpack/BUILD
new file mode 100644
index 00000000000000..adf1d5b1fc6588
--- /dev/null
+++ b/tensorflow/lite/delegates/xnnpack/BUILD
@@ -0,0 +1,83 @@
+load("//tensorflow/lite:special_rules.bzl", "tflite_portable_test_suite_combined")
+
+package(
+    default_visibility = ["//visibility:public"],
+    licenses = ["notice"],  # Apache 2.0
+)
+
+EMSCRIPTEN_LINKOPTS = [
+    "-s ASSERTIONS=2",
+    "-s ERROR_ON_UNDEFINED_SYMBOLS=1",
+    "-s DEMANGLE_SUPPORT=1",
+    "-s EXIT_RUNTIME=1",
+    "-s ALLOW_MEMORY_GROWTH=1",
+    "-s TOTAL_MEMORY=134217728",
+]
+
+cc_library(
+    name = "xnnpack_delegate",
+    srcs = ["xnnpack_delegate.cc"],
+    hdrs = ["xnnpack_delegate.h"],
+    deps = [
+        "//tensorflow/lite:kernel_api",
+        "//tensorflow/lite:util",
+        "//tensorflow/lite/c:common",
+        "//tensorflow/lite/schema:schema_fbs",
+        "@XNNPACK",
+    ],
+)
+
+############################## Integration tests ###############################
+
+cc_library(
+    name = "test_main",
+    testonly = 1,
+    linkopts = select({
+        "//tensorflow:emscripten": EMSCRIPTEN_LINKOPTS,
+        "//conditions:default": [],
+    }),
+    deps = [
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+cc_test(
+    name = "conv_2d_test",
+    srcs = ["conv_2d_test.cc"],
+    linkopts = select({
+        "//tensorflow:emscripten": EMSCRIPTEN_LINKOPTS,
+        "//conditions:default": [],
+    }),
+    deps = [
+        ":test_main",
+        ":xnnpack_delegate",
+        "//tensorflow/lite:framework",
+        "//tensorflow/lite:schema_fbs_version",
+        "//tensorflow/lite/kernels:builtin_ops",
+        "//tensorflow/lite/schema:schema_fbs",
+        "@com_google_googletest//:gtest",
+        "@flatbuffers",
+    ],
+)
+
+cc_test(
+    name = "depthwise_conv_2d_test",
+    srcs = ["depthwise_conv_2d_test.cc"],
+    linkopts = select({
+        "//tensorflow:emscripten": EMSCRIPTEN_LINKOPTS,
+        "//conditions:default": [],
+    }),
+    tags = ["nomsan"],  # b/145129478
+    deps = [
+        ":test_main",
+        ":xnnpack_delegate",
+        "//tensorflow/lite:framework",
+        "//tensorflow/lite:schema_fbs_version",
+        "//tensorflow/lite/kernels:builtin_ops",
+        "//tensorflow/lite/schema:schema_fbs",
+        "@com_google_googletest//:gtest",
+        "@flatbuffers",
+    ],
+)
+
+tflite_portable_test_suite_combined(combine_conditions = {"deps": [":test_main"]})
diff --git a/tensorflow/lite/delegates/xnnpack/README.md b/tensorflow/lite/delegates/xnnpack/README.md
new file mode 100644
index 00000000000000..3d3ffc88737744
--- /dev/null
+++ b/tensorflow/lite/delegates/xnnpack/README.md
@@ -0,0 +1,81 @@
+# XNNPACK backend for TensorFlow Lite
+
+XNNPACK is a highly optimized library of floating-point neural network
+inference operators for ARM, WebAssembly, and x86 platforms. This document
+describes how to use the XNNPACK library as a backend for TensorFlow Lite.
+
+## Enabling XNNPACK backend in TensorFlow Lite models
+
+XNNPACK integrates with TensorFlow Lite interpreter through the delegation
+mechanism. To leverage XNNPACK library for acceleration, the users need to
+create an XNNPACK delegate with the `TfLiteXNNPackDelegateCreate` function,
+and call `Interpreter::ModifyGraphWithDelegate` to delegate supported parts of
+the model to the XNNPACK delegate. The users must destroy the delegate with
+`TfLiteXNNPackDelegateDelete` **after** releasing the TensorFlow Lite
+interpreter. The snippet below illustrates the typical usage:
+
+
+```c++
+// Build the interpreter
+std::unique_ptr<tflite::Interpreter> interpreter;
+...
+
+// IMPORTANT: initialize options with TfLiteXNNPackDelegateOptionsDefault() for
+// API-compatibility with future extensions of the TfLiteXNNPackDelegateOptions
+// structure.
+TfLiteXNNPackDelegateOptions xnnpack_options =
+    TfLiteXNNPackDelegateOptionsDefault();
+xnnpack_options.num_threads = num_threads;
+
+TfLiteDelegate* xnnpack_delegate =
+    TfLiteXNNPackDelegateCreate(&xnnpack_options);
+if (interpreter->ModifyGraphWithDelegate(xnnpack_delegate) != kTfLiteOk) {
+  // Report error and fall back to another delegate, or the default backend
+}
+
+...
+
+// Run inference using XNNPACK
+interpreter->Invoke()
+
+...
+
+// IMPORTANT: release the interpreter before destroing the delegate
+interpreter.reset();
+TfLiteXNNPackDelegateDelete(xnnpack_delegate);
+```
+
+## Limitations and supported operators
+
+XNNPACK delegate is a work-in-progress, and currently supports a limited set of
+operators. Unsupported operators will fall back to the default implementations,
+so models using a combination of supported and unsupported operators can still
+benefit from XNNPACK delegate.
+
+Below is the list of current operators and limitations:
+
+### `CONV_2D`
+
+* Inputs and outputs must be in 32-bit floating-point format.
+* Bias is mandatory.
+* Both filter and bias must be static (use `kTfLiteMmapRo` allocation type).
+* Fused `NONE`, `RELU`, `RELU_N1_TO_1`, and `RELU6` activations are supported,
+  but fused `TANH` and `SIGN_BIT` activations are not.
+* Dynamically allocated (with `kTfLiteDynamic` allocation type) input and output
+  are not supported.
+
+### `DEPTHWISE_CONV_2D`
+
+* Inputs and outputs must be in 32-bit floating-point format.
+* Bias is mandatory.
+* Both filter and bias must be static (use `kTfLiteMmapRo` allocation type).
+* Fused `NONE`, `RELU`, `RELU_N1_TO_1`, and `RELU6` activations are supported,
+  but fused `TANH` and `SIGN_BIT` activations are not.
+* Dynamically allocated (with `kTfLiteDynamic` allocation type) input and output
+  are not supported.
+
+### Other limitations
+
+* Resizing model inputs (via `Interpreter::ResizeInputTensor`) is supported, but
+  cause a complete reinitialization of the delegate instance, which has
+  considerable overhead.
diff --git a/tensorflow/lite/delegates/xnnpack/conv_2d_test.cc b/tensorflow/lite/delegates/xnnpack/conv_2d_test.cc
new file mode 100644
index 00000000000000..bd17dff81924c2
--- /dev/null
+++ b/tensorflow/lite/delegates/xnnpack/conv_2d_test.cc
@@ -0,0 +1,510 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <cstdint>
+#include <functional>
+#include <random>
+#include <vector>
+
+#include <gtest/gtest.h>
+#include "flatbuffers/flatbuffers.h"  // TF:flatbuffers
+#include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h"
+#include "tensorflow/lite/interpreter.h"
+#include "tensorflow/lite/kernels/register.h"
+#include "tensorflow/lite/model.h"
+#include "tensorflow/lite/schema/schema_generated.h"
+#include "tensorflow/lite/version.h"
+
+namespace tflite {
+namespace xnnpack {
+
+namespace {
+
+class Conv2DTester {
+ public:
+  Conv2DTester() = default;
+  Conv2DTester(const Conv2DTester&) = delete;
+  Conv2DTester& operator=(const Conv2DTester&) = delete;
+
+  Conv2DTester& BatchSize(int32_t batch_size) {
+    EXPECT_GT(batch_size, 0);
+    batch_size_ = batch_size;
+    return *this;
+  }
+
+  int32_t BatchSize() const { return batch_size_; }
+
+  Conv2DTester& InputChannels(int32_t input_channels) {
+    EXPECT_GT(input_channels, 0);
+    input_channels_ = input_channels;
+    return *this;
+  }
+
+  int32_t InputChannels() const { return input_channels_; }
+
+  Conv2DTester& OutputChannels(int32_t output_channels) {
+    EXPECT_GT(output_channels, 0);
+    output_channels_ = output_channels;
+    return *this;
+  }
+
+  int32_t OutputChannels() const { return output_channels_; }
+
+  Conv2DTester& InputHeight(int32_t input_height) {
+    EXPECT_GT(input_height, 0);
+    input_height_ = input_height;
+    return *this;
+  }
+
+  int32_t InputHeight() const { return input_height_; }
+
+  Conv2DTester& InputWidth(int32_t input_width) {
+    EXPECT_GT(input_width, 0);
+    input_width_ = input_width;
+    return *this;
+  }
+
+  int32_t InputWidth() const { return input_width_; }
+
+  int32_t OutputWidth() const {
+    if (SamePadding()) {
+      return (InputWidth() - 1) / StrideWidth() + 1;
+    } else {
+      return (InputWidth() - (KernelWidth() - 1) * DilationWidth() - 1) /
+                 StrideWidth() +
+             1;
+    }
+  }
+
+  int32_t OutputHeight() const {
+    if (SamePadding()) {
+      return (InputHeight() - 1) / StrideHeight() + 1;
+    } else {
+      return (InputHeight() - (KernelHeight() - 1) * DilationHeight() - 1) /
+                 StrideHeight() +
+             1;
+    }
+  }
+
+  Conv2DTester& KernelHeight(int32_t kernel_height) {
+    EXPECT_GT(kernel_height, 0);
+    kernel_height_ = kernel_height;
+    return *this;
+  }
+
+  int32_t KernelHeight() const { return kernel_height_; }
+
+  Conv2DTester& KernelWidth(int32_t kernel_width) {
+    EXPECT_GT(kernel_width, 0);
+    kernel_width_ = kernel_width;
+    return *this;
+  }
+
+  int32_t KernelWidth() const { return kernel_width_; }
+
+  Conv2DTester& StrideHeight(int32_t stride_height) {
+    EXPECT_GT(stride_height, 0);
+    stride_height_ = stride_height;
+    return *this;
+  }
+
+  int32_t StrideHeight() const { return stride_height_; }
+
+  Conv2DTester& StrideWidth(int32_t stride_width) {
+    EXPECT_GT(stride_width, 0);
+    stride_width_ = stride_width;
+    return *this;
+  }
+
+  int32_t StrideWidth() const { return stride_width_; }
+
+  Conv2DTester& DilationHeight(int32_t dilation_height) {
+    EXPECT_GT(dilation_height, 0);
+    dilation_height_ = dilation_height;
+    return *this;
+  }
+
+  int32_t DilationHeight() const { return dilation_height_; }
+
+  Conv2DTester& DilationWidth(int32_t dilation_width) {
+    EXPECT_GT(dilation_width, 0);
+    dilation_width_ = dilation_width;
+    return *this;
+  }
+
+  int32_t DilationWidth() const { return dilation_width_; }
+
+  Conv2DTester& SamePadding(bool same_padding) {
+    same_padding_ = same_padding;
+    return *this;
+  }
+
+  bool SamePadding() const { return same_padding_; }
+
+  void Test(TfLiteDelegate* delegate) const {
+    std::random_device random_device;
+    auto rng = std::mt19937(random_device());
+    auto f32rng = std::bind(std::uniform_real_distribution<float>(), rng);
+
+    std::vector<char> buffer = CreateTfLiteModel(std::ref(f32rng));
+    const Model* model = GetModel(buffer.data());
+
+    std::unique_ptr<Interpreter> delegate_interpreter;
+    ASSERT_EQ(
+        InterpreterBuilder(model, ::tflite::ops::builtin::BuiltinOpResolver())(
+            &delegate_interpreter),
+        kTfLiteOk);
+    std::unique_ptr<Interpreter> default_interpreter;
+    ASSERT_EQ(
+        InterpreterBuilder(model, ::tflite::ops::builtin::BuiltinOpResolver())(
+            &default_interpreter),
+        kTfLiteOk);
+
+    ASSERT_TRUE(delegate_interpreter);
+    ASSERT_TRUE(default_interpreter);
+
+    ASSERT_EQ(delegate_interpreter->inputs().size(), 1);
+    ASSERT_EQ(default_interpreter->inputs().size(), 1);
+
+    ASSERT_EQ(delegate_interpreter->outputs().size(), 1);
+    ASSERT_EQ(default_interpreter->outputs().size(), 1);
+
+    ASSERT_EQ(delegate_interpreter->AllocateTensors(), kTfLiteOk);
+    ASSERT_EQ(default_interpreter->AllocateTensors(), kTfLiteOk);
+
+    ASSERT_EQ(delegate_interpreter->ModifyGraphWithDelegate(delegate),
+              kTfLiteOk);
+
+    float* default_input_data = default_interpreter->typed_tensor<float>(
+        default_interpreter->inputs()[0]);
+    std::generate(default_input_data,
+                  default_input_data + BatchSize() * InputHeight() *
+                                           InputWidth() * InputChannels(),
+                  std::ref(f32rng));
+
+    float* xnnpack_input_data = delegate_interpreter->typed_tensor<float>(
+        delegate_interpreter->inputs()[0]);
+    std::copy(default_input_data,
+              default_input_data +
+                  BatchSize() * InputHeight() * InputWidth() * InputChannels(),
+              xnnpack_input_data);
+
+    default_interpreter->Invoke();
+    delegate_interpreter->Invoke();
+
+    float* default_output_data = default_interpreter->typed_tensor<float>(
+        default_interpreter->outputs()[0]);
+    float* xnnpack_output_data = delegate_interpreter->typed_tensor<float>(
+        delegate_interpreter->outputs()[0]);
+
+    for (size_t i = 0;
+         i < BatchSize() * OutputHeight() * OutputWidth() * OutputChannels();
+         i++) {
+      ASSERT_NEAR(default_output_data[i], xnnpack_output_data[i],
+                  std::numeric_limits<float>::epsilon() *
+                      std::max(std::abs(default_output_data[i]) * 25.0f, 1.0f));
+    }
+  }
+
+ private:
+  std::vector<char> CreateTfLiteModel(std::function<float()> f32rng) const {
+    flatbuffers::FlatBufferBuilder builder;
+    flatbuffers::Offset<OperatorCode> operator_code =
+        CreateOperatorCode(builder, BuiltinOperator_CONV_2D, 0);
+
+    flatbuffers::Offset<Conv2DOptions> conv2d_options = CreateConv2DOptions(
+        builder, SamePadding() ? tflite::Padding_SAME : tflite::Padding_VALID,
+        StrideWidth(), StrideHeight(), ActivationFunctionType_NONE,
+        DilationWidth(), DilationHeight());
+
+    std::vector<float> filter_data(OutputChannels() * KernelHeight() *
+                                   KernelWidth() * InputChannels());
+    std::vector<float> bias_data(OutputChannels());
+
+    std::generate(filter_data.begin(), filter_data.end(), f32rng);
+    std::generate(bias_data.begin(), bias_data.end(), f32rng);
+
+    flatbuffers::Offset<Buffer> buffers[3] = {
+        CreateBuffer(builder, builder.CreateVector({})),
+        CreateBuffer(builder,
+                     builder.CreateVector(
+                         reinterpret_cast<const uint8_t*>(filter_data.data()),
+                         sizeof(float) * filter_data.size())),
+        CreateBuffer(builder,
+                     builder.CreateVector(
+                         reinterpret_cast<const uint8_t*>(bias_data.data()),
+                         sizeof(float) * bias_data.size())),
+    };
+
+    const int32_t input_shape[4] = {BatchSize(), InputHeight(), InputWidth(),
+                                    InputChannels()};
+    const int32_t output_shape[4] = {BatchSize(), OutputHeight(), OutputWidth(),
+                                     OutputChannels()};
+    const int32_t filter_shape[4] = {OutputChannels(), KernelHeight(),
+                                     KernelWidth(), InputChannels()};
+    const int32_t bias_shape[1] = {OutputChannels()};
+
+    flatbuffers::Offset<Tensor> tensors[4] = {
+        CreateTensor(builder, builder.CreateVector<int32_t>(input_shape, 4),
+                     TensorType_FLOAT32, /*buffer=*/0,
+                     builder.CreateString("X")),
+        CreateTensor(builder, builder.CreateVector<int32_t>(filter_shape, 4),
+                     TensorType_FLOAT32, /*buffer=*/1,
+                     builder.CreateString("W")),
+        CreateTensor(builder, builder.CreateVector<int32_t>(bias_shape, 1),
+                     TensorType_FLOAT32, /*buffer=*/2,
+                     builder.CreateString("b")),
+        CreateTensor(builder, builder.CreateVector<int32_t>(output_shape, 4),
+                     TensorType_FLOAT32, /*buffer=*/0,
+                     builder.CreateString("Y")),
+    };
+
+    const int32_t op_inputs[3] = {0, 1, 2};
+    const int32_t op_outputs[1] = {3};
+
+    flatbuffers::Offset<Operator> op =
+        CreateOperator(builder, /*opcode_index=*/0,
+                       builder.CreateVector<int32_t>(op_inputs, 3),
+                       builder.CreateVector<int32_t>(op_outputs, 1),
+                       BuiltinOptions_Conv2DOptions, conv2d_options.Union());
+
+    int32_t subgraph_inputs[1] = {0};
+    int32_t subgraph_outputs[1] = {3};
+    flatbuffers::Offset<SubGraph> subgraph =
+        CreateSubGraph(builder, builder.CreateVector(tensors, 4),
+                       builder.CreateVector<int32_t>(subgraph_inputs, 1),
+                       builder.CreateVector<int32_t>(subgraph_outputs, 1),
+                       builder.CreateVector(&op, 1), /*name=*/0);
+
+    flatbuffers::Offset<flatbuffers::String> description =
+        builder.CreateString("Conv2D model");
+
+    flatbuffers::Offset<Model> model_buffer = CreateModel(
+        builder, TFLITE_SCHEMA_VERSION, builder.CreateVector(&operator_code, 1),
+        builder.CreateVector(&subgraph, 1), description,
+        builder.CreateVector(buffers, 3));
+
+    builder.Finish(model_buffer);
+
+    return std::vector<char>(builder.GetBufferPointer(),
+                             builder.GetBufferPointer() + builder.GetSize());
+  }
+
+  int32_t batch_size_ = 1;
+  int32_t input_channels_ = 1;
+  int32_t output_channels_ = 1;
+  int32_t input_height_ = 1;
+  int32_t input_width_ = 1;
+  int32_t kernel_height_ = 1;
+  int32_t kernel_width_ = 1;
+  int32_t stride_height_ = 1;
+  int32_t stride_width_ = 1;
+  int32_t dilation_height_ = 1;
+  int32_t dilation_width_ = 1;
+  bool same_padding_ = true;
+};
+
+}  // namespace
+
+TEST(Conv2D, Pointwise) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto input_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(5, 25), std::ref(rng));
+  auto channel_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(1, 16), std::ref(rng));
+
+  Conv2DTester()
+      .InputHeight(input_rng())
+      .InputWidth(input_rng())
+      .InputChannels(channel_rng())
+      .OutputChannels(channel_rng())
+      .KernelHeight(1)
+      .KernelWidth(1)
+      .Test(xnnpack_delegate.get());
+}
+
+TEST(Conv2D, SmallKernelWithSamePadding) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto input_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(10, 25), std::ref(rng));
+  auto kernel_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 7), std::ref(rng));
+  auto channel_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(1, 16), std::ref(rng));
+
+  Conv2DTester()
+      .InputHeight(input_rng())
+      .InputWidth(input_rng())
+      .InputChannels(channel_rng())
+      .OutputChannels(channel_rng())
+      .KernelHeight(kernel_rng())
+      .KernelWidth(kernel_rng())
+      .SamePadding(true)
+      .Test(xnnpack_delegate.get());
+}
+
+TEST(Conv2D, SmallKernelWithValidPadding) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto input_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(10, 25), std::ref(rng));
+  auto kernel_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 7), std::ref(rng));
+  auto channel_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(1, 16), std::ref(rng));
+
+  Conv2DTester()
+      .InputHeight(input_rng())
+      .InputWidth(input_rng())
+      .InputChannels(channel_rng())
+      .OutputChannels(channel_rng())
+      .KernelHeight(kernel_rng())
+      .KernelWidth(kernel_rng())
+      .SamePadding(false)
+      .Test(xnnpack_delegate.get());
+}
+
+TEST(Conv2D, StrideWithSamePadding) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto input_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(10, 25), std::ref(rng));
+  auto kernel_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(3, 5), std::ref(rng));
+  auto stride_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 3), std::ref(rng));
+  auto channel_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(1, 16), std::ref(rng));
+
+  Conv2DTester()
+      .InputHeight(input_rng())
+      .InputWidth(input_rng())
+      .InputChannels(channel_rng())
+      .OutputChannels(channel_rng())
+      .KernelHeight(kernel_rng())
+      .KernelWidth(kernel_rng())
+      .StrideHeight(stride_rng())
+      .StrideWidth(stride_rng())
+      .SamePadding(true)
+      .Test(xnnpack_delegate.get());
+}
+
+TEST(Conv2D, StrideWithValidPadding) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto input_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(10, 25), std::ref(rng));
+  auto kernel_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(3, 5), std::ref(rng));
+  auto stride_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 3), std::ref(rng));
+  auto channel_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(1, 16), std::ref(rng));
+
+  Conv2DTester()
+      .InputHeight(input_rng())
+      .InputWidth(input_rng())
+      .InputChannels(channel_rng())
+      .OutputChannels(channel_rng())
+      .KernelHeight(kernel_rng())
+      .KernelWidth(kernel_rng())
+      .StrideHeight(stride_rng())
+      .StrideWidth(stride_rng())
+      .SamePadding(false)
+      .Test(xnnpack_delegate.get());
+}
+
+TEST(Conv2D, DilationWithSamePadding) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto input_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(10, 25), std::ref(rng));
+  auto kernel_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 3), std::ref(rng));
+  auto dilation_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 3), std::ref(rng));
+  auto channel_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(1, 16), std::ref(rng));
+
+  Conv2DTester()
+      .InputHeight(input_rng())
+      .InputWidth(input_rng())
+      .InputChannels(channel_rng())
+      .OutputChannels(channel_rng())
+      .KernelHeight(kernel_rng())
+      .KernelWidth(kernel_rng())
+      .DilationHeight(dilation_rng())
+      .DilationWidth(dilation_rng())
+      .SamePadding(true)
+      .Test(xnnpack_delegate.get());
+}
+
+TEST(Conv2D, DilationWithValidPadding) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto input_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(10, 25), std::ref(rng));
+  auto kernel_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 3), std::ref(rng));
+  auto dilation_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 3), std::ref(rng));
+  auto channel_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(1, 16), std::ref(rng));
+
+  Conv2DTester()
+      .InputHeight(input_rng())
+      .InputWidth(input_rng())
+      .InputChannels(channel_rng())
+      .OutputChannels(channel_rng())
+      .KernelHeight(kernel_rng())
+      .KernelWidth(kernel_rng())
+      .DilationHeight(dilation_rng())
+      .DilationWidth(dilation_rng())
+      .SamePadding(false)
+      .Test(xnnpack_delegate.get());
+}
+
+}  // namespace xnnpack
+}  // namespace tflite
diff --git a/tensorflow/lite/delegates/xnnpack/depthwise_conv_2d_test.cc b/tensorflow/lite/delegates/xnnpack/depthwise_conv_2d_test.cc
new file mode 100644
index 00000000000000..3fb520466e04f7
--- /dev/null
+++ b/tensorflow/lite/delegates/xnnpack/depthwise_conv_2d_test.cc
@@ -0,0 +1,433 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <cstdint>
+#include <functional>
+#include <random>
+#include <vector>
+
+#include <gtest/gtest.h>
+#include "flatbuffers/flatbuffers.h"  // TF:flatbuffers
+#include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h"
+#include "tensorflow/lite/interpreter.h"
+#include "tensorflow/lite/kernels/register.h"
+#include "tensorflow/lite/model.h"
+#include "tensorflow/lite/schema/schema_generated.h"
+#include "tensorflow/lite/version.h"
+
+namespace tflite {
+namespace xnnpack {
+
+namespace {
+
+class DepthwiseConv2DTester {
+ public:
+  DepthwiseConv2DTester() = default;
+  DepthwiseConv2DTester(const DepthwiseConv2DTester&) = delete;
+  DepthwiseConv2DTester& operator=(const DepthwiseConv2DTester&) = delete;
+
+  DepthwiseConv2DTester& BatchSize(int32_t batch_size) {
+    EXPECT_GT(batch_size, 0);
+    batch_size_ = batch_size;
+    return *this;
+  }
+
+  int32_t BatchSize() const { return batch_size_; }
+
+  DepthwiseConv2DTester& Groups(int32_t groups) {
+    EXPECT_GT(groups, 0);
+    groups_ = groups;
+    return *this;
+  }
+
+  int32_t Groups() const { return groups_; }
+
+  DepthwiseConv2DTester& DepthMultiplier(int32_t depth_multiplier) {
+    EXPECT_GT(depth_multiplier, 0);
+    depth_multiplier_ = depth_multiplier;
+    return *this;
+  }
+
+  int32_t DepthMultiplier() const { return depth_multiplier_; }
+
+  int32_t InputChannels() const { return Groups(); }
+
+  int32_t OutputChannels() const { return DepthMultiplier() * Groups(); }
+
+  DepthwiseConv2DTester& InputHeight(int32_t input_height) {
+    EXPECT_GT(input_height, 0);
+    input_height_ = input_height;
+    return *this;
+  }
+
+  int32_t InputHeight() const { return input_height_; }
+
+  DepthwiseConv2DTester& InputWidth(int32_t input_width) {
+    EXPECT_GT(input_width, 0);
+    input_width_ = input_width;
+    return *this;
+  }
+
+  int32_t InputWidth() const { return input_width_; }
+
+  int32_t OutputWidth() const {
+    const int32_t output_width = (InputWidth() - 1) / StrideWidth() + 1;
+    EXPECT_GT(output_width, 0);
+    return output_width;
+  }
+
+  int32_t OutputHeight() const {
+    const int32_t output_height = (InputHeight() - 1) / StrideHeight() + 1;
+    EXPECT_GT(output_height, 0);
+    return output_height;
+  }
+
+  DepthwiseConv2DTester& KernelHeight(int32_t kernel_height) {
+    EXPECT_GT(kernel_height, 0);
+    kernel_height_ = kernel_height;
+    return *this;
+  }
+
+  int32_t KernelHeight() const { return kernel_height_; }
+
+  DepthwiseConv2DTester& KernelWidth(int32_t kernel_width) {
+    EXPECT_GT(kernel_width, 0);
+    kernel_width_ = kernel_width;
+    return *this;
+  }
+
+  int32_t KernelWidth() const { return kernel_width_; }
+
+  DepthwiseConv2DTester& StrideHeight(int32_t stride_height) {
+    EXPECT_GT(stride_height, 0);
+    stride_height_ = stride_height;
+    return *this;
+  }
+
+  int32_t StrideHeight() const { return stride_height_; }
+
+  DepthwiseConv2DTester& StrideWidth(int32_t stride_width) {
+    EXPECT_GT(stride_width, 0);
+    stride_width_ = stride_width;
+    return *this;
+  }
+
+  int32_t StrideWidth() const { return stride_width_; }
+
+  DepthwiseConv2DTester& DilationHeight(int32_t dilation_height) {
+    EXPECT_GT(dilation_height, 0);
+    dilation_height_ = dilation_height;
+    return *this;
+  }
+
+  int32_t DilationHeight() const { return dilation_height_; }
+
+  DepthwiseConv2DTester& DilationWidth(int32_t dilation_width) {
+    EXPECT_GT(dilation_width, 0);
+    dilation_width_ = dilation_width;
+    return *this;
+  }
+
+  int32_t DilationWidth() const { return dilation_width_; }
+
+  void Test(TfLiteDelegate* delegate) const {
+    ASSERT_EQ(DepthMultiplier(), 1) << "Flow does not support depth multiplier";
+
+    std::random_device random_device;
+    auto rng = std::mt19937(random_device());
+    auto f32rng = std::bind(std::uniform_real_distribution<float>(), rng);
+
+    std::vector<char> buffer = CreateTfLiteModel(std::ref(f32rng));
+    const Model* model = GetModel(buffer.data());
+
+    std::unique_ptr<Interpreter> delegate_interpreter;
+    ASSERT_EQ(
+        InterpreterBuilder(model, ::tflite::ops::builtin::BuiltinOpResolver())(
+            &delegate_interpreter),
+        kTfLiteOk);
+    std::unique_ptr<Interpreter> default_interpreter;
+    ASSERT_EQ(
+        InterpreterBuilder(model, ::tflite::ops::builtin::BuiltinOpResolver())(
+            &default_interpreter),
+        kTfLiteOk);
+
+    ASSERT_TRUE(delegate_interpreter);
+    ASSERT_TRUE(default_interpreter);
+
+    ASSERT_EQ(delegate_interpreter->inputs().size(), 1);
+    ASSERT_EQ(default_interpreter->inputs().size(), 1);
+
+    ASSERT_EQ(delegate_interpreter->outputs().size(), 1);
+    ASSERT_EQ(default_interpreter->outputs().size(), 1);
+
+    ASSERT_EQ(delegate_interpreter->AllocateTensors(), kTfLiteOk);
+    ASSERT_EQ(default_interpreter->AllocateTensors(), kTfLiteOk);
+
+    ASSERT_EQ(delegate_interpreter->ModifyGraphWithDelegate(delegate),
+              kTfLiteOk);
+
+    float* default_input_data = default_interpreter->typed_tensor<float>(
+        default_interpreter->inputs()[0]);
+    std::generate(default_input_data,
+                  default_input_data + BatchSize() * InputChannels() *
+                                           InputHeight() * InputWidth(),
+                  std::ref(f32rng));
+
+    float* xnnpack_input_data = delegate_interpreter->typed_tensor<float>(
+        delegate_interpreter->inputs()[0]);
+    std::copy(default_input_data,
+              default_input_data +
+                  BatchSize() * InputChannels() * InputHeight() * InputWidth(),
+              xnnpack_input_data);
+
+    default_interpreter->Invoke();
+    delegate_interpreter->Invoke();
+
+    float* default_output_data = default_interpreter->typed_tensor<float>(
+        default_interpreter->outputs()[0]);
+    float* xnnpack_output_data = delegate_interpreter->typed_tensor<float>(
+        delegate_interpreter->outputs()[0]);
+
+    for (size_t i = 0;
+         i < BatchSize() * OutputChannels() * OutputHeight() * OutputWidth();
+         i++) {
+      ASSERT_NEAR(default_output_data[i], xnnpack_output_data[i],
+                  std::numeric_limits<float>::epsilon() *
+                      std::max(std::abs(default_output_data[i]) * 10.0f, 1.0f));
+    }
+  }
+
+ private:
+  std::vector<char> CreateTfLiteModel(std::function<float()> f32rng) const {
+    flatbuffers::FlatBufferBuilder builder;
+    flatbuffers::Offset<OperatorCode> operator_code =
+        CreateOperatorCode(builder, BuiltinOperator_DEPTHWISE_CONV_2D, 0);
+
+    flatbuffers::Offset<DepthwiseConv2DOptions> depthwise_conv2d_options =
+        CreateDepthwiseConv2DOptions(builder, Padding_SAME, StrideWidth(),
+                                     StrideHeight(), DepthMultiplier(),
+                                     ActivationFunctionType_NONE,
+                                     DilationWidth(), DilationHeight());
+
+    std::vector<float> filter_data(KernelHeight() * KernelWidth() *
+                                   OutputChannels());
+    std::vector<float> bias_data(OutputChannels());
+
+    std::generate(filter_data.begin(), filter_data.end(), f32rng);
+    std::generate(bias_data.begin(), bias_data.end(), f32rng);
+
+    flatbuffers::Offset<Buffer> buffers[3] = {
+        CreateBuffer(builder, builder.CreateVector({})),
+        CreateBuffer(builder,
+                     builder.CreateVector(
+                         reinterpret_cast<const uint8_t*>(filter_data.data()),
+                         sizeof(float) * filter_data.size())),
+        CreateBuffer(builder,
+                     builder.CreateVector(
+                         reinterpret_cast<const uint8_t*>(bias_data.data()),
+                         sizeof(float) * bias_data.size())),
+    };
+
+    const int32_t input_shape[4] = {BatchSize(), InputHeight(), InputWidth(),
+                                    InputChannels()};
+    const int32_t output_shape[4] = {BatchSize(), OutputHeight(), OutputWidth(),
+                                     OutputChannels()};
+    const int32_t filter_shape[4] = {1, KernelHeight(), KernelWidth(),
+                                     OutputChannels()};
+    const int32_t bias_shape[1] = {OutputChannels()};
+
+    flatbuffers::Offset<Tensor> tensors[4] = {
+        CreateTensor(builder, builder.CreateVector<int32_t>(input_shape, 4),
+                     TensorType_FLOAT32, /*buffer=*/0,
+                     builder.CreateString("X")),
+        CreateTensor(builder, builder.CreateVector<int32_t>(filter_shape, 4),
+                     TensorType_FLOAT32, /*buffer=*/1,
+                     builder.CreateString("W")),
+        CreateTensor(builder, builder.CreateVector<int32_t>(bias_shape, 1),
+                     TensorType_FLOAT32, /*buffer=*/2,
+                     builder.CreateString("b")),
+        CreateTensor(builder, builder.CreateVector<int32_t>(output_shape, 4),
+                     TensorType_FLOAT32, /*buffer=*/0,
+                     builder.CreateString("Y")),
+    };
+
+    const int32_t op_inputs[3] = {0, 1, 2};
+    const int32_t op_outputs[1] = {3};
+
+    flatbuffers::Offset<Operator> op = CreateOperator(
+        builder, /*opcode_index=*/0,
+        builder.CreateVector<int32_t>(op_inputs, 3),
+        builder.CreateVector<int32_t>(op_outputs, 1),
+        BuiltinOptions_DepthwiseConv2DOptions, depthwise_conv2d_options.Union(),
+        /*custom_options=*/0, CustomOptionsFormat_FLEXBUFFERS);
+
+    int32_t subgraph_inputs[1] = {0};
+    int32_t subgraph_outputs[1] = {3};
+    flatbuffers::Offset<SubGraph> subgraph =
+        CreateSubGraph(builder, builder.CreateVector(tensors, 4),
+                       builder.CreateVector<int32_t>(subgraph_inputs, 1),
+                       builder.CreateVector<int32_t>(subgraph_outputs, 1),
+                       builder.CreateVector(&op, 1), /*name=*/0);
+
+    flatbuffers::Offset<flatbuffers::String> description =
+        builder.CreateString("DepthwiseConv2D model");
+
+    flatbuffers::Offset<Model> model_buffer = CreateModel(
+        builder, TFLITE_SCHEMA_VERSION, builder.CreateVector(&operator_code, 1),
+        builder.CreateVector(&subgraph, 1), description,
+        builder.CreateVector(buffers, 3));
+
+    builder.Finish(model_buffer);
+
+    return std::vector<char>(builder.GetBufferPointer(),
+                             builder.GetBufferPointer() + builder.GetSize());
+  }
+
+  int32_t batch_size_ = 1;
+  int32_t groups_ = 1;
+  int32_t depth_multiplier_ = 1;
+  int32_t input_height_ = 1;
+  int32_t input_width_ = 1;
+  int32_t kernel_height_ = 1;
+  int32_t kernel_width_ = 1;
+  int32_t stride_height_ = 1;
+  int32_t stride_width_ = 1;
+  int32_t dilation_height_ = 1;
+  int32_t dilation_width_ = 1;
+};
+
+}  // namespace
+
+TEST(DepthwiseConv2D, 2x2) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto input_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(5, 25), std::ref(rng));
+  auto groups_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 32), std::ref(rng));
+
+  DepthwiseConv2DTester()
+      .InputHeight(input_rng())
+      .InputWidth(input_rng())
+      .Groups(groups_rng())
+      .KernelHeight(2)
+      .KernelWidth(2)
+      .Test(xnnpack_delegate.get());
+}
+
+TEST(DepthwiseConv2D, 3x3) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto input_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(5, 25), std::ref(rng));
+  auto groups_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 32), std::ref(rng));
+
+  DepthwiseConv2DTester()
+      .InputHeight(input_rng())
+      .InputWidth(input_rng())
+      .Groups(groups_rng())
+      .KernelHeight(3)
+      .KernelWidth(3)
+      .Test(xnnpack_delegate.get());
+}
+
+TEST(DepthwiseConv2D, SmallKernel) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto input_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(10, 25), std::ref(rng));
+  auto kernel_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 7), std::ref(rng));
+  auto groups_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 32), std::ref(rng));
+
+  DepthwiseConv2DTester()
+      .InputHeight(input_rng())
+      .InputWidth(input_rng())
+      .Groups(groups_rng())
+      .KernelHeight(kernel_rng())
+      .KernelWidth(kernel_rng())
+      .Test(xnnpack_delegate.get());
+}
+
+TEST(DepthwiseConv2D, Stride) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto input_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(10, 25), std::ref(rng));
+  auto kernel_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(3, 5), std::ref(rng));
+  auto stride_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 3), std::ref(rng));
+  auto groups_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 32), std::ref(rng));
+
+  DepthwiseConv2DTester()
+      .InputHeight(input_rng())
+      .InputWidth(input_rng())
+      .Groups(groups_rng())
+      .KernelHeight(kernel_rng())
+      .KernelWidth(kernel_rng())
+      .StrideHeight(stride_rng())
+      .StrideWidth(stride_rng())
+      .Test(xnnpack_delegate.get());
+}
+
+TEST(DepthwiseConv2D, Dilation) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto input_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(10, 25), std::ref(rng));
+  auto kernel_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 3), std::ref(rng));
+  auto dilation_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 3), std::ref(rng));
+  auto group_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(3, 32), std::ref(rng));
+
+  DepthwiseConv2DTester()
+      .InputHeight(input_rng())
+      .InputWidth(input_rng())
+      .Groups(group_rng())
+      .KernelHeight(kernel_rng())
+      .KernelWidth(kernel_rng())
+      .DilationHeight(dilation_rng())
+      .DilationWidth(dilation_rng())
+      .Test(xnnpack_delegate.get());
+}
+
+}  // namespace xnnpack
+}  // namespace tflite
diff --git a/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc b/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc
new file mode 100644
index 00000000000000..330aacea042a83
--- /dev/null
+++ b/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc
@@ -0,0 +1,826 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include <xnnpack.h>
+#include "tensorflow/lite/builtin_ops.h"
+#include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/c/common.h"
+
+namespace tflite {
+namespace xnnpack {
+namespace {
+
+// Forward declaration.
+TfLiteStatus DelegatePrepare(TfLiteContext* context, TfLiteDelegate* delegate);
+
+class Delegate {
+ public:
+  explicit Delegate(const TfLiteXNNPackDelegateOptions* options) {
+    if (options) {
+      options_ = *options;
+    } else {
+      // default: don't use thread pool.
+      options_.num_threads = 0;
+    }
+  }
+
+  TfLiteDelegate* tflite_delegate() { return &delegate_; }
+
+ private:
+  TfLiteDelegate delegate_ = {
+      reinterpret_cast<void*>(this),  // .data_
+      DelegatePrepare,                // .Prepare
+      nullptr,                        // .CopyFromBufferHandle
+      nullptr,                        // .CopyToBufferHandle
+      nullptr,                        // .FreeBufferHandle
+      kTfLiteDelegateFlagsNone,       // .flags
+  };
+
+  TfLiteXNNPackDelegateOptions options_;
+};
+
+class Subgraph {
+ public:
+  static Subgraph* Create(TfLiteContext* context,
+                          const TfLiteDelegateParams* params) {
+    // Convert subgraph inputs and outputs to hash sets for faster lookup.
+    const std::unordered_set<int> inputs(
+        &params->input_tensors->data[0],
+        &params->input_tensors->data[params->input_tensors->size]);
+    const std::unordered_set<int> outputs(
+        &params->output_tensors->data[0],
+        &params->output_tensors->data[params->output_tensors->size]);
+    std::unordered_set<int> externals(outputs);
+
+    TfLiteIntArray* execution_plan;
+    if (context->GetExecutionPlan(context, &execution_plan) != kTfLiteOk) {
+      return nullptr;
+    }
+
+    xnn_subgraph_t subgraph_ptr = nullptr;
+    xnn_status status = xnn_create_subgraph(
+        /*external_value_ids=*/context->tensors_size, /*flags=*/0,
+        &subgraph_ptr);
+    if (status != xnn_status_success) {
+      context->ReportError(context, "failed to create XNNPACK subgraph");
+      return nullptr;
+    }
+
+    // Smart pointer to automatically release subgraph on exit.
+    std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> subgraph(
+        subgraph_ptr, &xnn_delete_subgraph);
+
+    // Detect which tensors are used as inputs or outputs of any subgraph nodes.
+    // -1 denotes tensor not used in the subgraph. These indexes will be
+    // filtered out and removed later.
+    std::vector<int> tensors(context->tensors_size, -1);
+    for (int i = 0; i < params->nodes_to_replace->size; i++) {
+      TfLiteNode* node = nullptr;
+      TfLiteRegistration* registration = nullptr;
+      if (context->GetNodeAndRegistration(context,
+                                          params->nodes_to_replace->data[i],
+                                          &node, &registration) != kTfLiteOk) {
+        return nullptr;
+      }
+
+      for (int k = 0; k < node->inputs->size; k++) {
+        const int t = node->inputs->data[k];
+        tensors[t] = t;
+      }
+      for (int k = 0; k < node->outputs->size; k++) {
+        const int t = node->outputs->data[k];
+        tensors[t] = t;
+      }
+    }
+    // Filter out and remove -1 (unused) indexes.
+    tensors.erase(std::remove_if(tensors.begin(), tensors.end(),
+                                 [](int i) { return i < 0; }),
+                  tensors.end());
+    std::sort(tensors.begin(), tensors.end());
+
+    // XNNPACK Value IDs for TFLite tensors
+    std::vector<uint32_t> xnnpack_tensors(tensors.back() + 1);
+    for (int t : tensors) {
+      if (context->tensors[t].type != kTfLiteFloat32) {
+        context->ReportError(
+            context,
+            "unsupported datatype (%s) of tensor %d in XNNPACK delegate",
+            TfLiteTypeGetName(context->tensors[t].type), t);
+        return nullptr;
+      }
+
+      uint32_t flags = 0;
+      const void* data = nullptr;
+      if (context->tensors[t].allocation_type == kTfLiteMmapRo) {
+        data = context->tensors[t].data.raw_const;
+      }
+      if (inputs.count(t) != 0) {
+        flags |= XNN_VALUE_FLAG_EXTERNAL_INPUT;
+        if (data == nullptr) {
+          externals.insert(t);
+        }
+      }
+      if (outputs.count(t) != 0) {
+        flags |= XNN_VALUE_FLAG_EXTERNAL_OUTPUT;
+      }
+
+      std::vector<size_t> dims(
+          &context->tensors[t].dims->data[0],
+          &context->tensors[t].dims->data[context->tensors[t].dims->size]);
+
+      const xnn_status status = xnn_define_tensor_value(
+          subgraph.get(), xnn_datatype_fp32, dims.size(), dims.data(), data,
+          static_cast<uint32_t>(t), flags, &xnnpack_tensors[t]);
+      if (status != xnn_status_success) {
+        context->ReportError(context,
+                             "failed to create XNNPACK Value for tensor %d", t);
+        return nullptr;
+      }
+    }
+
+    // Create XNNPACK nodes for TFLite delegate nodes
+    for (int i = 0; i < params->nodes_to_replace->size; i++) {
+      TfLiteNode* node = nullptr;
+      TfLiteRegistration* registration = nullptr;
+      if (context->GetNodeAndRegistration(context,
+                                          params->nodes_to_replace->data[i],
+                                          &node, &registration) != kTfLiteOk) {
+        return nullptr;
+      }
+
+      if (VisitNode(subgraph.get(), context, registration, node, i,
+                    xnnpack_tensors) != kTfLiteOk) {
+        return nullptr;
+      }
+    }
+
+    xnn_runtime_t runtime_ptr = nullptr;
+    status = xnn_create_runtime(subgraph.get(), &runtime_ptr);
+    if (status != xnn_status_success) {
+      context->ReportError(context, "failed to create XNNPACK runtime");
+      return nullptr;
+    }
+
+    return new Subgraph(runtime_ptr, std::move(externals));
+  }
+
+  TfLiteStatus Prepare(TfLiteContext* context) { return kTfLiteOk; }
+
+  TfLiteStatus Invoke(TfLiteContext* context) {
+    if (first_run_) {
+      std::vector<xnn_external_value> external_values;
+      for (int t : externals_) {
+        xnn_external_value value = {0};
+        value.id = static_cast<uint32_t>(t);
+        value.data = context->tensors[t].data.raw;
+        external_values.push_back(value);
+      }
+
+      const xnn_status status = xnn_setup_runtime(
+          runtime_.get(), external_values.size(), external_values.data());
+      if (status != xnn_status_success) {
+        context->ReportError(context, "failed to setup XNNPACK runtime");
+        return kTfLiteError;
+      }
+
+      first_run_ = false;
+    }
+
+    const xnn_status status = xnn_invoke_runtime(runtime_.get());
+    if (status != xnn_status_success) {
+      context->ReportError(context, "failed to invoke XNNPACK runtime");
+      return kTfLiteError;
+    }
+
+    return kTfLiteOk;
+  }
+
+  static TfLiteStatus CalculatePadding(TfLiteContext* context,
+                                       TfLitePadding padding, uint32_t* flags,
+                                       int node_index) {
+    switch (padding) {
+      case kTfLitePaddingSame: {
+        *flags = XNN_FLAG_TENSORFLOW_SAME_PADDING;
+        return kTfLiteOk;
+      }
+      case kTfLitePaddingValid:
+        *flags = 0;
+        return kTfLiteOk;
+      default:
+        if (context != nullptr) {
+          context->ReportError(context, "invalid padding mode (%d) in node #%d",
+                               static_cast<int>(padding), node_index);
+        }
+        return kTfLiteError;
+    }
+  }
+
+  static TfLiteStatus ConvertActivationToOutputRange(
+      TfLiteContext* context, int node_index, TfLiteFusedActivation activation,
+      float* output_min, float* output_max) {
+    switch (activation) {
+      case kTfLiteActNone:
+        *output_min = -std::numeric_limits<float>::infinity();
+        *output_max = +std::numeric_limits<float>::infinity();
+        return kTfLiteOk;
+      case kTfLiteActRelu:
+        *output_min = 0.0f;
+        *output_max = +std::numeric_limits<float>::infinity();
+        return kTfLiteOk;
+      case kTfLiteActRelu1:
+        *output_min = -1.0f;
+        *output_max = +1.0f;
+        return kTfLiteOk;
+      case kTfLiteActRelu6:
+        *output_min = 0.0f;
+        *output_max = 6.0f;
+        return kTfLiteOk;
+      case kTfLiteActTanh:
+        if (context != nullptr) {
+          context->ReportError(
+              context, "unsupported fused activation (Tanh) in node #%d",
+              node_index);
+        }
+        return kTfLiteError;
+      case kTfLiteActSignBit:
+        if (context != nullptr) {
+          context->ReportError(
+              context, "unsupported fused activation (Sign) in node #%d",
+              node_index);
+        }
+        return kTfLiteError;
+      case kTfLiteActSigmoid:
+        if (context != nullptr) {
+          context->ReportError(
+              context, "unsupported fused activation (Sigmoid) in node #%d",
+              node_index);
+        }
+        return kTfLiteError;
+      default:
+        if (context != nullptr) {
+          context->ReportError(context,
+                               "invalid fused activation (%d) in node #%d",
+                               static_cast<int>(activation), node_index);
+        }
+        return kTfLiteError;
+    }
+  }
+
+  static TfLiteStatus CheckConvolutionParams(TfLiteContext* context,
+                                             const TfLiteConvParams* params,
+                                             int node_index) {
+    if (params->stride_width <= 0) {
+      if (context != nullptr) {
+        context->ReportError(context, "invalid stride width %d in node #%d",
+                             params->stride_width, node_index);
+      }
+      return kTfLiteError;
+    }
+    if (params->stride_height <= 0) {
+      if (context != nullptr) {
+        context->ReportError(context, "invalid stride height %d in node #%d",
+                             params->stride_height, node_index);
+      }
+      return kTfLiteError;
+    }
+
+    if (params->dilation_width_factor <= 0) {
+      if (context != nullptr) {
+        context->ReportError(context,
+                             "invalid dilation width factor %d in node #%d",
+                             params->dilation_width_factor, node_index);
+      }
+      return kTfLiteError;
+    }
+    if (params->dilation_height_factor <= 0) {
+      if (context != nullptr) {
+        context->ReportError(context,
+                             "invalid dilation height factor %d in node #%d",
+                             params->dilation_height_factor, node_index);
+      }
+      return kTfLiteError;
+    }
+
+    return kTfLiteOk;
+  }
+
+  static TfLiteStatus CheckDepthwiseConvolutionParams(
+      TfLiteContext* context, const TfLiteDepthwiseConvParams* params,
+      int output_channels, int node_index) {
+    if (params->stride_width <= 0) {
+      if (context != nullptr) {
+        context->ReportError(context, "invalid stride width %d in node #%d",
+                             params->stride_width, node_index);
+      }
+      return kTfLiteError;
+    }
+    if (params->stride_height <= 0) {
+      if (context != nullptr) {
+        context->ReportError(context, "invalid stride height %d in node #%d",
+                             params->stride_height, node_index);
+      }
+      return kTfLiteError;
+    }
+
+    if (params->depth_multiplier <= 0) {
+      if (context != nullptr) {
+        context->ReportError(context, "invalid depth multiplier %d in node #%d",
+                             params->depth_multiplier, node_index);
+      }
+      return kTfLiteError;
+    }
+    if (output_channels % params->depth_multiplier != 0) {
+      if (context != nullptr) {
+        context->ReportError(context,
+                             "depth multiplier %d is incompatible with "
+                             "number of output channels %d in node #%d",
+                             params->depth_multiplier, output_channels,
+                             node_index);
+      }
+      return kTfLiteError;
+    }
+
+    if (params->dilation_width_factor <= 0) {
+      if (context != nullptr) {
+        context->ReportError(context,
+                             "invalid dilation width factor %d in node #%d",
+                             params->dilation_width_factor, node_index);
+      }
+      return kTfLiteError;
+    }
+    if (params->dilation_height_factor <= 0) {
+      if (context != nullptr) {
+        context->ReportError(context,
+                             "invalid dilation height factor %d in node #%d",
+                             params->dilation_height_factor, node_index);
+      }
+      return kTfLiteError;
+    }
+
+    return kTfLiteOk;
+  }
+
+  static TfLiteStatus CheckNumInputsAndOutputs(TfLiteContext* context,
+                                               TfLiteNode* node,
+                                               int expected_num_inputs,
+                                               int expected_num_outputs,
+                                               int node_index) {
+    if (node->inputs->size != expected_num_inputs) {
+      if (context != nullptr) {
+        context->ReportError(
+            context, "unexpected number of inputs (%d != %d) in node #%d",
+            node->inputs->size, expected_num_inputs, node_index);
+      }
+      return kTfLiteError;
+    }
+    if (node->outputs->size != expected_num_outputs) {
+      if (context != nullptr) {
+        context->ReportError(
+            context, "unexpected number of output (%d != %d) in node #%d",
+            node->outputs->size, expected_num_outputs, node_index);
+      }
+      return kTfLiteError;
+    }
+    return kTfLiteOk;
+  }
+
+  static TfLiteStatus CheckTensorFloatType(TfLiteContext* context,
+                                           const TfLiteTensor& tensor,
+                                           int tensor_index, int node_index) {
+    if (tensor.type != kTfLiteFloat32) {
+      if (context != nullptr) {
+        context->ReportError(
+            context, "unsupported type %s in tensor #%d in node #%d",
+            TfLiteTypeGetName(tensor.type), tensor_index, node_index);
+      }
+      return kTfLiteError;
+    }
+    return kTfLiteOk;
+  }
+
+  static TfLiteStatus CheckTensorShape(TfLiteContext* context,
+                                       const TfLiteTensor& tensor,
+                                       int expected_num_dims,
+                                       int tensor_index) {
+    if (tensor.dims->size != expected_num_dims) {
+      if (context != nullptr) {
+        context->ReportError(
+            context,
+            "unexpected number of shape dimensions (%d != %d) in tensor #%d",
+            tensor.dims->size, expected_num_dims, tensor_index);
+      }
+      return kTfLiteError;
+    }
+    for (int i = 0; i < tensor.dims->size; i++) {
+      if (tensor.dims->data[i] <= 0) {
+        context->ReportError(context,
+                             "invalid dimension #%d (%d) in tensor #%d", i,
+                             tensor.dims->data[i], tensor_index);
+        return kTfLiteError;
+      }
+    }
+    return kTfLiteOk;
+  }
+
+  static TfLiteStatus CheckTensorNonDynamicAllocation(
+      TfLiteContext* context, const TfLiteTensor& tensor, int tensor_index,
+      int node_index) {
+    // TODO(b/149120844): remove checks once dynamic tensors are supported
+    if (tensor.allocation_type == kTfLiteDynamic) {
+      if (context != nullptr) {
+        context->ReportError(
+            context,
+            "invalid allocation type in tensor #%d in node #%d: "
+            "expected non-dynamic tensor",
+            tensor_index, node_index);
+      }
+      return kTfLiteError;
+    }
+    return kTfLiteOk;
+  }
+
+  static TfLiteStatus CheckTensorStaticAllocation(TfLiteContext* context,
+                                                  const TfLiteTensor& tensor,
+                                                  int tensor_index,
+                                                  int node_index) {
+    if (tensor.allocation_type != kTfLiteMmapRo ||
+        tensor.data.raw_const == nullptr) {
+      if (context != nullptr) {
+        context->ReportError(
+            context,
+            "invalid allocation type in tensor #%d in node #%d: "
+            "expected static read-only tensor",
+            tensor_index, node_index);
+      }
+      return kTfLiteError;
+    }
+    return kTfLiteOk;
+  }
+
+  static TfLiteStatus VisitNode(xnn_subgraph_t subgraph, TfLiteContext* context,
+                                TfLiteRegistration* registration,
+                                TfLiteNode* node, int node_index,
+                                const std::vector<uint32_t>& xnnpack_tensors) {
+    // TFLite context used for logging purposes. When we create a new node
+    // (subgraph is non-null), logging context is the same as context, and error
+    // messages are passed to TFLite. When we detect supported operations
+    // (subgraph is null), logging context is null, and error messages are
+    // supressed.
+    TfLiteContext* logging_context = subgraph == nullptr ? nullptr : context;
+    switch (registration->builtin_code) {
+      case kTfLiteBuiltinConv2d: {
+        const TfLiteConvParams* conv_params =
+            static_cast<const TfLiteConvParams*>(node->builtin_data);
+
+        return VisitConv2DNode(subgraph, logging_context, node_index, node,
+                               context->tensors, conv_params, xnnpack_tensors);
+      }
+      case kTfLiteBuiltinDepthwiseConv2d: {
+        const TfLiteDepthwiseConvParams* dwconv_params =
+            static_cast<const TfLiteDepthwiseConvParams*>(node->builtin_data);
+
+        return VisitDepthwiseConv2DNode(subgraph, logging_context, node_index,
+                                        node, context->tensors, dwconv_params,
+                                        xnnpack_tensors);
+      }
+      default:
+        return kTfLiteError;
+    }
+  }
+
+  static TfLiteStatus VisitConv2DNode(
+      xnn_subgraph_t subgraph, TfLiteContext* logging_context, int node_index,
+      TfLiteNode* node, const TfLiteTensor* tensors,
+      const TfLiteConvParams* conv_params,
+      const std::vector<uint32_t>& xnnpack_tensors) {
+    TF_LITE_ENSURE_STATUS(
+        CheckConvolutionParams(logging_context, conv_params, node_index));
+
+    TF_LITE_ENSURE_STATUS(
+        CheckNumInputsAndOutputs(logging_context, node, 3, 1, node_index));
+
+    const TfLiteTensor& input_tensor = tensors[node->inputs->data[0]];
+
+    TF_LITE_ENSURE_STATUS(CheckTensorFloatType(
+        logging_context, input_tensor, node->inputs->data[0], node_index));
+
+    TF_LITE_ENSURE_STATUS(CheckTensorShape(logging_context, input_tensor, 4,
+                                           node->inputs->data[0]));
+
+    TF_LITE_ENSURE_STATUS(CheckTensorNonDynamicAllocation(
+        logging_context, input_tensor, node->inputs->data[0], node_index));
+
+    const TfLiteTensor& filter_tensor = tensors[node->inputs->data[1]];
+
+    TF_LITE_ENSURE_STATUS(CheckTensorFloatType(
+        logging_context, filter_tensor, node->inputs->data[1], node_index));
+
+    TF_LITE_ENSURE_STATUS(CheckTensorShape(logging_context, filter_tensor, 4,
+                                           node->inputs->data[1]));
+
+    TF_LITE_ENSURE_STATUS(CheckTensorStaticAllocation(
+        logging_context, filter_tensor, node->inputs->data[1], node_index));
+
+    const TfLiteTensor& bias_tensor = tensors[node->inputs->data[2]];
+
+    TF_LITE_ENSURE_STATUS(CheckTensorFloatType(
+        logging_context, filter_tensor, node->inputs->data[2], node_index));
+
+    TF_LITE_ENSURE_STATUS(CheckTensorShape(logging_context, bias_tensor, 1,
+                                           node->inputs->data[2]));
+
+    TF_LITE_ENSURE_STATUS(CheckTensorStaticAllocation(
+        logging_context, bias_tensor, node->inputs->data[2], node_index));
+
+    const TfLiteTensor& output_tensor = tensors[node->outputs->data[0]];
+
+    TF_LITE_ENSURE_STATUS(CheckTensorFloatType(
+        logging_context, output_tensor, node->outputs->data[0], node_index));
+
+    TF_LITE_ENSURE_STATUS(CheckTensorShape(logging_context, output_tensor, 4,
+                                           node->outputs->data[0]));
+
+    TF_LITE_ENSURE_STATUS(CheckTensorNonDynamicAllocation(
+        logging_context, output_tensor, node->outputs->data[0], node_index));
+
+    const int output_channels = filter_tensor.dims->data[0];
+    const int kernel_height = filter_tensor.dims->data[1];
+    const int kernel_width = filter_tensor.dims->data[2];
+    const int input_channels = filter_tensor.dims->data[3];
+
+    uint32_t flags;
+    TF_LITE_ENSURE_STATUS(CalculatePadding(
+        logging_context, conv_params->padding, &flags, node_index));
+
+    float output_min = -std::numeric_limits<float>::infinity();
+    float output_max = +std::numeric_limits<float>::infinity();
+    TF_LITE_ENSURE_STATUS(ConvertActivationToOutputRange(
+        logging_context, node_index, conv_params->activation, &output_min,
+        &output_max));
+
+    if (subgraph != nullptr) {
+      const xnn_status status = xnn_define_convolution_2d(
+          subgraph,
+          /*input_padding_top=*/0,
+          /*input_padding_right=*/0,
+          /*input_padding_bottom=*/0,
+          /*input_padding_left=*/0, static_cast<uint32_t>(kernel_height),
+          static_cast<uint32_t>(kernel_width),
+          static_cast<uint32_t>(conv_params->stride_height),
+          static_cast<uint32_t>(conv_params->stride_width),
+          static_cast<uint32_t>(conv_params->dilation_height_factor),
+          static_cast<uint32_t>(conv_params->dilation_width_factor),
+          /*groups=*/1, static_cast<size_t>(input_channels),
+          static_cast<size_t>(output_channels), output_min, output_max,
+          /*input_id=*/xnnpack_tensors[node->inputs->data[0]],
+          /*filter_id=*/xnnpack_tensors[node->inputs->data[1]],
+          /*bias_id=*/xnnpack_tensors[node->inputs->data[2]],
+          /*output_id=*/xnnpack_tensors[node->outputs->data[0]], flags);
+      if (status != xnn_status_success) {
+        logging_context->ReportError(
+            logging_context, "failed to delegate Convolution 2D node #%d",
+            node_index);
+        return kTfLiteError;
+      }
+    }
+
+    return kTfLiteOk;
+  }
+
+  static TfLiteStatus VisitDepthwiseConv2DNode(
+      xnn_subgraph_t subgraph, TfLiteContext* logging_context, int node_index,
+      TfLiteNode* node, const TfLiteTensor* tensors,
+      const TfLiteDepthwiseConvParams* dwconv_params,
+      const std::vector<uint32_t>& xnnpack_tensors) {
+    TF_LITE_ENSURE_STATUS(
+        CheckNumInputsAndOutputs(logging_context, node, 3, 1, node_index));
+
+    const TfLiteTensor& input_tensor = tensors[node->inputs->data[0]];
+
+    TF_LITE_ENSURE_STATUS(CheckTensorFloatType(
+        logging_context, input_tensor, node->inputs->data[0], node_index));
+
+    TF_LITE_ENSURE_STATUS(CheckTensorShape(logging_context, input_tensor, 4,
+                                           node->inputs->data[0]));
+
+    TF_LITE_ENSURE_STATUS(CheckTensorNonDynamicAllocation(
+        logging_context, input_tensor, node->inputs->data[0], node_index));
+
+    const TfLiteTensor& filter_tensor = tensors[node->inputs->data[1]];
+
+    TF_LITE_ENSURE_STATUS(CheckTensorFloatType(
+        logging_context, filter_tensor, node->inputs->data[1], node_index));
+
+    TF_LITE_ENSURE_STATUS(CheckTensorShape(logging_context, filter_tensor, 4,
+                                           node->inputs->data[1]));
+
+    TF_LITE_ENSURE_STATUS(CheckTensorStaticAllocation(
+        logging_context, filter_tensor, node->inputs->data[1], node_index));
+
+    const TfLiteTensor& bias_tensor = tensors[node->inputs->data[2]];
+
+    TF_LITE_ENSURE_STATUS(CheckTensorFloatType(
+        logging_context, filter_tensor, node->inputs->data[2], node_index));
+
+    TF_LITE_ENSURE_STATUS(CheckTensorShape(logging_context, bias_tensor, 1,
+                                           node->inputs->data[2]));
+
+    TF_LITE_ENSURE_STATUS(CheckTensorStaticAllocation(
+        logging_context, bias_tensor, node->inputs->data[2], node_index));
+
+    const TfLiteTensor& output_tensor = tensors[node->outputs->data[0]];
+
+    TF_LITE_ENSURE_STATUS(CheckTensorFloatType(
+        logging_context, output_tensor, node->outputs->data[0], node_index));
+
+    TF_LITE_ENSURE_STATUS(CheckTensorShape(logging_context, output_tensor, 4,
+                                           node->outputs->data[0]));
+
+    TF_LITE_ENSURE_STATUS(CheckTensorNonDynamicAllocation(
+        logging_context, output_tensor, node->outputs->data[0], node_index));
+
+    const int kernel_height = filter_tensor.dims->data[1];
+    const int kernel_width = filter_tensor.dims->data[2];
+    const int output_channels = filter_tensor.dims->data[3];
+
+    TF_LITE_ENSURE_STATUS(CheckDepthwiseConvolutionParams(
+        logging_context, dwconv_params, output_channels, node_index));
+
+    uint32_t flags = 0;
+    TF_LITE_ENSURE_STATUS(CalculatePadding(
+        logging_context, dwconv_params->padding, &flags, node_index));
+
+    float output_min = -std::numeric_limits<float>::infinity();
+    float output_max = +std::numeric_limits<float>::infinity();
+    TF_LITE_ENSURE_STATUS(ConvertActivationToOutputRange(
+        logging_context, node_index, dwconv_params->activation, &output_min,
+        &output_max));
+
+    if (subgraph != nullptr) {
+      const xnn_status status = xnn_define_depthwise_convolution_2d(
+          subgraph,
+          /*input_padding_top=*/0,
+          /*input_padding_right=*/0,
+          /*input_padding_bottom=*/0,
+          /*input_padding_left=*/0, static_cast<uint32_t>(kernel_height),
+          static_cast<uint32_t>(kernel_width),
+          static_cast<uint32_t>(dwconv_params->stride_height),
+          static_cast<uint32_t>(dwconv_params->stride_width),
+          static_cast<uint32_t>(dwconv_params->dilation_height_factor),
+          static_cast<uint32_t>(dwconv_params->dilation_width_factor),
+          static_cast<uint32_t>(dwconv_params->depth_multiplier),
+          /*input_channels=*/
+          static_cast<uint32_t>(output_channels /
+                                dwconv_params->depth_multiplier),
+          output_min, output_max,
+          /*input_id=*/xnnpack_tensors[node->inputs->data[0]],
+          /*filter_id=*/xnnpack_tensors[node->inputs->data[1]],
+          /*bias_id=*/xnnpack_tensors[node->inputs->data[2]],
+          /*output_id=*/xnnpack_tensors[node->outputs->data[0]], flags);
+      if (status != xnn_status_success) {
+        logging_context->ReportError(
+            logging_context,
+            "failed to delegate Depthwise Convolution 2D node #%d", node_index);
+        return kTfLiteError;
+      }
+    }
+
+    return kTfLiteOk;
+  }
+
+ private:
+  Subgraph(xnn_runtime_t runtime, std::unordered_set<int>&& externals)
+      : runtime_(runtime, &xnn_delete_runtime), externals_(externals) {}
+
+  // XNNPACK Runtime (subgraph + workspace) with smart-pointer for lifetime
+  // management.
+  std::unique_ptr<xnn_runtime, decltype(&xnn_delete_runtime)> runtime_{
+      nullptr, &xnn_delete_runtime};
+  // TFLite Tensor IDs == XNNPACK Value IDs of input/output tensors for the
+  // delegated subgraph.
+  std::unordered_set<int> externals_;
+  bool first_run_{true};
+};
+
+TfLiteIntArray* GetOpsToReplace(TfLiteContext* context) {
+  TfLiteIntArray* execution_plan = nullptr;
+  if (context->GetExecutionPlan(context, &execution_plan) != kTfLiteOk) {
+    context->ReportError(context, "Unable to get graph execution plan.");
+    return nullptr;
+  }
+
+  TfLiteIntArray* nodes_to_replace = TfLiteIntArrayCreate(execution_plan->size);
+  nodes_to_replace->size = 0;
+  for (int i = 0; i < execution_plan->size; ++i) {
+    const int node_index = execution_plan->data[i];
+
+    // Check if TFLite nodes can be delegated to XNNPACK
+    TfLiteNode* node = nullptr;
+    TfLiteRegistration* registration = nullptr;
+    if (context->GetNodeAndRegistration(context, node_index, &node,
+                                        &registration) != kTfLiteOk) {
+      context->ReportError(context,
+                           "Unable to get node and registration for node %d.",
+                           node_index);
+      continue;  // Soft error (skip this node).
+    }
+
+    if (Subgraph::VisitNode(/*subgraph=*/nullptr, context, registration, node,
+                            node_index, std::vector<uint32_t>()) != kTfLiteOk) {
+      // Non-delegatable node is not an error.
+      continue;
+    }
+
+    nodes_to_replace->data[nodes_to_replace->size++] = node_index;
+  }
+  return nodes_to_replace;
+}
+
+void* SubgraphInit(TfLiteContext* context, const char* buffer, size_t length) {
+  const TfLiteDelegateParams* params =
+      reinterpret_cast<const TfLiteDelegateParams*>(buffer);
+
+  return static_cast<void*>(Subgraph::Create(context, params));
+}
+
+TfLiteStatus SubgraphPrepare(TfLiteContext* context, TfLiteNode* node) {
+  return static_cast<Subgraph*>(node->user_data)->Prepare(context);
+}
+
+TfLiteStatus SubgraphInvoke(TfLiteContext* context, TfLiteNode* node) {
+  return static_cast<Subgraph*>(node->user_data)->Invoke(context);
+}
+
+void SubgraphFree(TfLiteContext* context, void* buffer) {
+  if (buffer != nullptr) {
+    delete static_cast<Subgraph*>(buffer);
+  }
+}
+
+const TfLiteRegistration kSubgraphRegistration = {
+    /*.init=*/SubgraphInit,
+    /*.free=*/SubgraphFree,
+    /*.prepare=*/SubgraphPrepare,
+    /*.invoke=*/SubgraphInvoke,
+    /*.profiling_string=*/nullptr,
+    /*.builtin_code=*/0,
+    /*.custom_name=*/"TfLiteXNNPackDelegate",
+    /*.version=*/2,
+};
+
+TfLiteStatus DelegatePrepare(TfLiteContext* context, TfLiteDelegate* delegate) {
+  TfLiteIntArray* ops_to_replace = GetOpsToReplace(context);
+  const TfLiteStatus status = context->ReplaceNodeSubsetsWithDelegateKernels(
+      context, kSubgraphRegistration, ops_to_replace, delegate);
+  TfLiteIntArrayFree(ops_to_replace);
+  return status;
+}
+
+}  // namespace
+}  // namespace xnnpack
+}  // namespace tflite
+
+TfLiteXNNPackDelegateOptions TfLiteXNNPackDelegateOptionsDefault() {
+  TfLiteXNNPackDelegateOptions options = {0};
+  return options;
+}
+
+TfLiteDelegate* TfLiteXNNPackDelegateCreate(
+    const TfLiteXNNPackDelegateOptions* options) {
+  xnn_status status = xnn_initialize(/*allocator=*/nullptr);
+  if (status != xnn_status_success) {
+    return nullptr;
+  }
+
+  auto* xnnpack_delegate = new ::tflite::xnnpack::Delegate(options);
+  return xnnpack_delegate ? xnnpack_delegate->tflite_delegate() : nullptr;
+}
+
+void TfLiteXNNPackDelegateDelete(TfLiteDelegate* delegate) {
+  if (delegate != nullptr) {
+    delete reinterpret_cast<::tflite::xnnpack::Delegate*>(delegate);
+  }
+}
diff --git a/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h b/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h
new file mode 100644
index 00000000000000..983a22a979db70
--- /dev/null
+++ b/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h
@@ -0,0 +1,47 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_DELEGATES_XNNPACK_XNNPACK_DELEGATE_H_
+#define TENSORFLOW_LITE_DELEGATES_XNNPACK_XNNPACK_DELEGATE_H_
+
+#include "tensorflow/lite/c/common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif  // __cplusplus
+
+typedef struct {
+  // Number of threads to use in the thread pool.
+  // 0 or negative value means no thread pool used.
+  int32_t num_threads;
+} TfLiteXNNPackDelegateOptions;
+
+// Returns a structure with the default XNNPack delegate options.
+TfLiteXNNPackDelegateOptions TfLiteXNNPackDelegateOptionsDefault();
+
+// Creates a new delegate instance that need to be destroyed with
+// `TfLiteXNNPackDelegateDelete` when delegate is no longer used by TFLite.
+// When `options` is set to `nullptr`, the following default values are used:
+TfLiteDelegate* TfLiteXNNPackDelegateCreate(
+    const TfLiteXNNPackDelegateOptions* options);
+
+// Destroys a delegate created with `TfLiteXNNPackDelegateCreate` call.
+void TfLiteXNNPackDelegateDelete(TfLiteDelegate* delegate);
+
+#ifdef __cplusplus
+}
+#endif  // __cplusplus
+
+#endif  // TENSORFLOW_LITE_DELEGATES_XNNPACK_XNNPACK_DELEGATE_H_
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 77e605fe76a6aa..b0c4545789d30a 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -127,6 +127,16 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""):
         print("path_prefix was specified to tf_workspace but is no longer used " +
               "and will be removed in the future.")
 
+    tf_http_archive(
+        name = "XNNPACK",
+        sha256 = "2894f92427a8b1314a3db5ac7087feb9097144f29ed726c885c1a87be3b08968",
+        strip_prefix = "XNNPACK-d6f77a69c28d9c99ae5226e3327ec87e1994e81d",
+        urls = [
+            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/XNNPACK/archive/d6f77a69c28d9c99ae5226e3327ec87e1994e81d.zip",
+            "https://github.com/google/XNNPACK/archive/d6f77a69c28d9c99ae5226e3327ec87e1994e81d.zip",
+        ],
+    )
+
     # Important: If you are upgrading MKL-DNN, then update the version numbers
     # in third_party/mkl_dnn/mkldnn.BUILD. In addition, the new version of
     # MKL-DNN might require upgrading MKL ML libraries also. If they need to be
diff --git a/third_party/clog/BUILD.bazel b/third_party/clog/BUILD.bazel
new file mode 100644
index 00000000000000..6431f980d97917
--- /dev/null
+++ b/third_party/clog/BUILD.bazel
@@ -0,0 +1,36 @@
+# Description:
+#   C-style (a-la printf) logging library
+
+package(default_visibility = ["//visibility:public"])
+
+licenses(["notice"])
+
+exports_files(["LICENSE"])
+
+cc_library(
+    name = "clog",
+    srcs = [
+        "deps/clog/src/clog.c",
+    ],
+    hdrs = [
+        "deps/clog/include/clog.h",
+    ],
+    copts = [
+        "-Wno-unused-result",
+    ],
+    linkopts = select({
+        ":android": [
+            "-llog",
+        ],
+        "//conditions:default": [
+        ],
+    }),
+    linkstatic = True,
+    strip_include_prefix = "deps/clog/include",
+)
+
+config_setting(
+    name = "android",
+    values = {"crosstool_top": "//external:android/crosstool"},
+    visibility = ["//visibility:public"],
+)
diff --git a/third_party/cpuinfo/BUILD.bazel b/third_party/cpuinfo/BUILD.bazel
new file mode 100644
index 00000000000000..cbdbd034004372
--- /dev/null
+++ b/third_party/cpuinfo/BUILD.bazel
@@ -0,0 +1,223 @@
+# cpuinfo, a library to detect information about the host CPU
+package(default_visibility = ["//visibility:public"])
+
+licenses(["notice"])
+
+exports_files(["LICENSE"])
+
+C99OPTS = [
+    "-std=gnu99",  # gnu99, not c99, because dprintf is used
+    "-Wno-vla",
+    "-D_GNU_SOURCE=1",  # to use CPU_SETSIZE
+    "-DCPUINFO_INTERNAL=",
+    "-DCPUINFO_PRIVATE=",
+]
+
+# Source code common to all platforms.
+COMMON_SRCS = [
+    "src/api.c",
+    "src/cache.c",
+    "src/init.c",
+]
+
+# Architecture-specific sources and headers.
+X86_SRCS = [
+    "src/x86/cache/descriptor.c",
+    "src/x86/cache/deterministic.c",
+    "src/x86/cache/init.c",
+    "src/x86/info.c",
+    "src/x86/init.c",
+    "src/x86/isa.c",
+    "src/x86/name.c",
+    "src/x86/topology.c",
+    "src/x86/uarch.c",
+    "src/x86/vendor.c",
+]
+
+ARM_SRCS = [
+    "src/arm/cache.c",
+    "src/arm/uarch.c",
+]
+
+# Platform-specific sources and headers
+LINUX_SRCS = [
+    "src/linux/cpulist.c",
+    "src/linux/current.c",
+    "src/linux/multiline.c",
+    "src/linux/processors.c",
+    "src/linux/smallfile.c",
+]
+
+MOCK_LINUX_SRCS = [
+    "src/linux/mockfile.c",
+]
+
+MACH_SRCS = [
+    "src/mach/topology.c",
+]
+
+EMSCRIPTEN_SRCS = [
+    "src/emscripten/init.c",
+]
+
+PNACL_SRCS = [
+    "src/pnacl/init.c",
+]
+
+LINUX_X86_SRCS = [
+    "src/x86/linux/cpuinfo.c",
+    "src/x86/linux/init.c",
+]
+
+LINUX_ARM_SRCS = [
+    "src/arm/linux/chipset.c",
+    "src/arm/linux/clusters.c",
+    "src/arm/linux/cpuinfo.c",
+    "src/arm/linux/hwcap.c",
+    "src/arm/linux/init.c",
+    "src/arm/linux/midr.c",
+]
+
+LINUX_ARM32_SRCS = LINUX_ARM_SRCS + ["src/arm/linux/aarch32-isa.c"]
+
+LINUX_ARM64_SRCS = LINUX_ARM_SRCS + ["src/arm/linux/aarch64-isa.c"]
+
+ANDROID_ARM_SRCS = [
+    "src/arm/android/properties.c",
+]
+
+WINDOWS_X86_SRCS = [
+    "src/x86/windows/init.c",
+]
+
+MACH_X86_SRCS = [
+    "src/x86/mach/init.c",
+]
+
+MACH_ARM_SRCS = [
+    "src/arm/mach/init.c",
+]
+
+cc_library(
+    name = "cpuinfo_impl",
+    srcs = select({
+        ":linux_x86_64": COMMON_SRCS + X86_SRCS + LINUX_SRCS + LINUX_X86_SRCS,
+        ":macos_x86_64": COMMON_SRCS + X86_SRCS + MACH_SRCS + MACH_X86_SRCS,
+        ":android_armv7": COMMON_SRCS + ARM_SRCS + LINUX_SRCS + LINUX_ARM32_SRCS + ANDROID_ARM_SRCS,
+        ":android_arm64": COMMON_SRCS + ARM_SRCS + LINUX_SRCS + LINUX_ARM64_SRCS + ANDROID_ARM_SRCS,
+        ":android_x86": COMMON_SRCS + X86_SRCS + LINUX_SRCS + LINUX_X86_SRCS,
+        ":android_x86_64": COMMON_SRCS + X86_SRCS + LINUX_SRCS + LINUX_X86_SRCS,
+        ":emscripten_wasm": COMMON_SRCS + EMSCRIPTEN_SRCS,
+    }),
+    copts = C99OPTS + [
+        "-Iexternal/cpuinfo/include",
+        "-Iexternal/cpuinfo/src",
+    ],
+    linkstatic = True,
+    # Headers must be in textual_hdrs to allow us to set the standard to C99
+    textual_hdrs = [
+        "include/cpuinfo.h",
+        "src/linux/api.h",
+        "src/mach/api.h",
+        "src/cpuinfo/common.h",
+        "src/cpuinfo/internal-api.h",
+        "src/cpuinfo/log.h",
+        "src/cpuinfo/utils.h",
+        "src/x86/api.h",
+        "src/x86/cpuid.h",
+        "src/x86/linux/api.h",
+        "src/arm/android/api.h",
+        "src/arm/linux/api.h",
+        "src/arm/linux/cp.h",
+        "src/arm/api.h",
+        "src/arm/midr.h",
+    ],
+    deps = [
+        "@clog",
+    ],
+)
+
+cc_library(
+    name = "cpuinfo",
+    hdrs = [
+        "include/cpuinfo.h",
+    ],
+    strip_include_prefix = "include",
+    deps = [
+        ":cpuinfo_impl",
+    ],
+)
+
+############################# Build configurations #############################
+
+config_setting(
+    name = "linux_x86_64",
+    values = {"cpu": "k8"},
+    visibility = ["//visibility:public"],
+)
+
+config_setting(
+    name = "macos_x86_64",
+    values = {
+        "apple_platform_type": "macos",
+        "cpu": "darwin",
+    },
+)
+
+config_setting(
+    name = "android_armv7",
+    values = {
+        "crosstool_top": "//external:android/crosstool",
+        "cpu": "armeabi-v7a",
+    },
+    visibility = ["//visibility:public"],
+)
+
+config_setting(
+    name = "android_arm64",
+    values = {
+        "crosstool_top": "//external:android/crosstool",
+        "cpu": "arm64-v8a",
+    },
+    visibility = ["//visibility:public"],
+)
+
+config_setting(
+    name = "android_x86",
+    values = {
+        "crosstool_top": "//external:android/crosstool",
+        "cpu": "x86",
+    },
+    visibility = ["//visibility:public"],
+)
+
+config_setting(
+    name = "android_x86_64",
+    values = {
+        "crosstool_top": "//external:android/crosstool",
+        "cpu": "x86_64",
+    },
+    visibility = ["//visibility:public"],
+)
+
+config_setting(
+    name = "emscripten_wasm",
+    values = {
+        "cpu": "wasm",
+    },
+)
+
+config_setting(
+    name = "emscripten_wasmsimd",
+    values = {
+        "cpu": "wasm",
+        "features": "wasm_simd",
+    },
+)
+
+config_setting(
+    name = "emscripten_asmjs",
+    values = {
+        "cpu": "asmjs",
+    },
+)
diff --git a/third_party/cpuinfo/workspace.bzl b/third_party/cpuinfo/workspace.bzl
new file mode 100644
index 00000000000000..c2eeede8a0d7c0
--- /dev/null
+++ b/third_party/cpuinfo/workspace.bzl
@@ -0,0 +1,15 @@
+"""Loads the cpuinfo library, used by XNNPACK."""
+
+load("//third_party:repo.bzl", "third_party_http_archive")
+
+def repo():
+    third_party_http_archive(
+        name = "cpuinfo",
+        strip_prefix = "cpuinfo-e39a5790059b6b8274ed91f7b5b5b13641dff267",
+        sha256 = "e5caa8b7c58f1623eed88f4d5147e3753ff19cde821526bc9aa551b004f751fe",
+        urls = [
+            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/pytorch/cpuinfo/archive/e39a5790059b6b8274ed91f7b5b5b13641dff267.tar.gz",
+            "https://github.com/pytorch/cpuinfo/archive/e39a5790059b6b8274ed91f7b5b5b13641dff267.tar.gz",
+        ],
+        build_file = "//third_party/cpuinfo:BUILD.bazel",
+    )
diff --git a/third_party/psimd/workspace.bzl b/third_party/psimd/workspace.bzl
new file mode 100644
index 00000000000000..ca0bca77d17a18
--- /dev/null
+++ b/third_party/psimd/workspace.bzl
@@ -0,0 +1,15 @@
+"""Loads the psimd library, used by XNNPACK."""
+
+load("//third_party:repo.bzl", "third_party_http_archive")
+
+def repo():
+    third_party_http_archive(
+        name = "psimd",
+        strip_prefix = "psimd-10b4ffc6ea9e2e11668f86969586f88bc82aaefa",
+        sha256 = "1fefd66702cb2eb3462b962f33d4fb23d59a55d5889ee6372469d286c4512df4",
+        urls = [
+            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/Maratyszcza/psimd/archive/10b4ffc6ea9e2e11668f86969586f88bc82aaefa.tar.gz",
+            "https://github.com/Maratyszcza/psimd/archive/10b4ffc6ea9e2e11668f86969586f88bc82aaefa.tar.gz",
+        ],
+        build_file = "//third_party/psimd:BUILD.bazel",
+    )