An ugly hack to compile on Tegra X1 /w Jetpack 2.3.1 release.

* Applied patch from https://github.com/jetsonhacks/installTensorFlowTX1 * Revert Eigen to revision used in Tensorflow r0.11 to avoid cuda compile error * Remove expm1 op that was added with new additions to Eigen
rwightman · Jan 12, 2017 · a1cde1d · a1cde1d
1 parent cb881e0
commit a1cde1d
Show file tree

Hide file tree

Showing 16 changed files with 53 additions and 130 deletions.
diff --git a/tensorflow/cc/gradients/math_grad.cc b/tensorflow/cc/gradients/math_grad.cc
@@ -105,6 +105,7 @@ Status ExpGrad(const Scope& scope, const Operation& op,
 }
 REGISTER_GRADIENT_OP("Exp", ExpGrad);
 
+#if 0
 Status Expm1Grad(const Scope& scope, const Operation& op,
                  const std::vector<Output>& grad_inputs,
                  std::vector<Output>* grad_outputs) {
@@ -115,6 +116,7 @@ Status Expm1Grad(const Scope& scope, const Operation& op,
   return scope.status();
 }
 REGISTER_GRADIENT_OP("Expm1", Expm1Grad);
+#endif
 
 Status LogGrad(const Scope& scope, const Operation& op,
                const std::vector<Output>& grad_inputs,

diff --git a/tensorflow/cc/gradients/math_grad_test.cc b/tensorflow/cc/gradients/math_grad_test.cc
@@ -42,7 +42,7 @@ class CWiseUnaryGradTest : public ::testing::Test {
     SQRT,
     RSQRT,
     EXP,
-    EXPM1,
+//    EXPM1,
     LOG,
     LOG1P,
     TANH,
@@ -100,9 +100,9 @@ class CWiseUnaryGradTest : public ::testing::Test {
       case EXP:
         y = Exp(scope_, x);
         break;
-      case EXPM1:
-        y = Expm1(scope_, x);
-        break;
+//      case EXPM1:
+//        y = Expm1(scope_, x);
+//        break;
       case LOG:
         y = Log(scope_, x);
         break;
@@ -208,6 +208,7 @@ TEST_F(CWiseUnaryGradTest, Exp) {
   TestCWiseGrad(EXP, x_fn, dy_fn, dx_fn);
 }
 
+#if 0
 TEST_F(CWiseUnaryGradTest, Expm1) {
   auto x_fn = [this](const int i) { return RV({0, -1, 1e-6, 1, -2, 3, 100}); };
   auto dy_fn = [this](const float x) { return x + RV({-2, 2, -3, 3, -4, 4}); };
@@ -216,6 +217,7 @@ TEST_F(CWiseUnaryGradTest, Expm1) {
   };
   TestCWiseGrad(EXPM1, x_fn, dy_fn, dx_fn);
 }
+#endif
 
 TEST_F(CWiseUnaryGradTest, Log) {
   auto x_fn = [this](const int i) { return RV({-1, 1, -2, 2, -3, 3, -4, 4}); };

diff --git a/tensorflow/core/kernels/cwise_op_expm1.cc b/tensorflow/core/kernels/cwise_op_expm1.cc
diff --git a/tensorflow/core/kernels/cwise_op_gpu_expm1.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_expm1.cu.cc
diff --git a/tensorflow/core/kernels/cwise_op_gpu_select.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_select.cu.cc
@@ -65,8 +65,14 @@ struct BatchSelectFunctor<GPUDevice, T> {
     const int all_but_batch = then_flat_outer_dims.dimension(1);
 
 #if !defined(EIGEN_HAS_INDEX_LIST)
-    Eigen::array<int, 2> broadcast_dims{{ 1, all_but_batch }};
-    Eigen::Tensor<int, 2>::Dimensions reshape_dims{{ batch, 1 }};
+    // Eigen::array<int, 2> broadcast_dims{{ 1, all_but_batch }};
+    Eigen::array<int, 2> broadcast_dims;
+    broadcast_dims[0] = 1;
+    broadcast_dims[1] = all_but_batch;
+    // Eigen::Tensor<int, 2>::Dimensions reshape_dims{{ batch, 1 }};
+    Eigen::Tensor<int, 2>::Dimensions reshape_dims;
+    reshape_dims[0] = batch;
+    reshape_dims[1] = 1;
 #else
     Eigen::IndexList<Eigen::type2index<1>, int> broadcast_dims;
     broadcast_dims.set(1, all_but_batch);

diff --git a/tensorflow/core/kernels/cwise_ops.h b/tensorflow/core/kernels/cwise_ops.h
@@ -471,8 +471,8 @@ struct rsqrt : base<T, Eigen::internal::scalar_rsqrt_op<T> > {};
 template <typename T>
 struct exp : base<T, Eigen::internal::scalar_exp_op<T> > {};
 
-template <typename T>
-struct expm1 : base<T, Eigen::internal::scalar_expm1_op<T>> {};
+//template <typename T>
+//struct expm1 : base<T, Eigen::internal::scalar_expm1_op<T>> {};
 
 template <typename T>
 struct log : base<T, Eigen::internal::scalar_log_op<T>> {};

diff --git a/tensorflow/core/kernels/sparse_tensor_dense_matmul_op_gpu.cu.cc b/tensorflow/core/kernels/sparse_tensor_dense_matmul_op_gpu.cu.cc
@@ -104,9 +104,17 @@ struct SparseTensorDenseMatMulFunctor<GPUDevice, T, ADJ_A, ADJ_B> {
     int n = (ADJ_B) ? b.dimension(0) : b.dimension(1);
 
 #if !defined(EIGEN_HAS_INDEX_LIST)
-    Eigen::Tensor<int, 2>::Dimensions matrix_1_by_nnz{{ 1, nnz }};
-    Eigen::array<int, 2> n_by_1{{ n, 1 }};
-    Eigen::array<int, 1> reduce_on_rows{{ 0 }};
+    // Eigen::Tensor<int, 2>::Dimensions matrix_1_by_nnz{{ 1, nnz }};
+    Eigen::Tensor<int, 2>::Dimensions matrix_1_by_nnz;
+    matrix_1_by_nnz[0] = 1;
+    matrix_1_by_nnz[1] = nnz;
+    // Eigen::array<int, 2> n_by_1{{ n, 1 }};
+    Eigen::array<int, 2> n_by_1;
+    n_by_1[0] = n;
+    n_by_1[1] = 1;
+    // Eigen::array<int, 1> reduce_on_rows{{ 0 }};
+    Eigen::array<int, 1> reduce_on_rows;
+    reduce_on_rows[0]= 0;
 #else
     Eigen::IndexList<Eigen::type2index<1>, int> matrix_1_by_nnz;
     matrix_1_by_nnz.set(1, nnz);

diff --git a/tensorflow/core/ops/compat/ops_history.v0.pbtxt b/tensorflow/core/ops/compat/ops_history.v0.pbtxt
@@ -11783,30 +11783,6 @@ op {
     }
   }
 }
-op {
-  name: "Expm1"
-  input_arg {
-    name: "x"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "y"
-    type_attr: "T"
-  }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-      }
-    }
-  }
-}
 op {
   name: "ExtractGlimpse"
   input_arg {

diff --git a/tensorflow/core/ops/math_grad.cc b/tensorflow/core/ops/math_grad.cc
@@ -123,6 +123,7 @@ Status ExpGrad(const AttrSlice& attrs, FunctionDef* g) {
 }
 REGISTER_OP_GRADIENT("Exp", ExpGrad);
 
+#if 0
 Status Expm1Grad(const AttrSlice& attrs, FunctionDef* g) {
   // clang-format off
   return GradForUnaryCwise(g, {
@@ -132,6 +133,7 @@ Status Expm1Grad(const AttrSlice& attrs, FunctionDef* g) {
   // clang-format on
 }
 REGISTER_OP_GRADIENT("Expm1", Expm1Grad);
+#endif
 
 Status LogGrad(const AttrSlice& attrs, FunctionDef* g) {
   // clang-format off

diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc
@@ -278,10 +278,12 @@ REGISTER_OP("Exp").UNARY_COMPLEX().Doc(R"doc(
 Computes exponential of x element-wise.  \\(y = e^x\\).
 )doc");
 
+#if 0
 REGISTER_OP("Expm1").UNARY_COMPLEX().Doc(R"doc(
 Computes exponential of x - 1 element-wise.
 I.e., \\(y = (\exp x) - 1\\).
 )doc");
+#endif
 
 REGISTER_OP("Log").UNARY_COMPLEX().Doc(R"doc(
 Computes natural logarithm of x element-wise.

diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
@@ -6759,32 +6759,6 @@ op {
   summary: "Inserts a dimension of 1 into a tensor\'s shape."
   description: "Given a tensor `input`, this operation inserts a dimension of 1 at the\ndimension index `dim` of `input`\'s shape. The dimension index `dim` starts at\nzero; if you specify a negative number for `dim` it is counted backward from\nthe end.\n\nThis operation is useful if you want to add a batch dimension to a single\nelement. For example, if you have a single image of shape `[height, width,\nchannels]`, you can make it a batch of 1 image with `expand_dims(image, 0)`,\nwhich will make the shape `[1, height, width, channels]`.\n\nOther examples:\n\n```prettyprint\n# \'t\' is a tensor of shape [2]\nshape(expand_dims(t, 0)) ==> [1, 2]\nshape(expand_dims(t, 1)) ==> [2, 1]\nshape(expand_dims(t, -1)) ==> [2, 1]\n\n# \'t2\' is a tensor of shape [2, 3, 5]\nshape(expand_dims(t2, 0)) ==> [1, 2, 3, 5]\nshape(expand_dims(t2, 2)) ==> [2, 3, 1, 5]\nshape(expand_dims(t2, 3)) ==> [2, 3, 5, 1]\n```\n\nThis operation requires that:\n\n`-1-input.dims() <= dim <= input.dims()`\n\nThis operation is related to `squeeze()`, which removes dimensions of\nsize 1."
 }
-op {
-  name: "Expm1"
-  input_arg {
-    name: "x"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "y"
-    type_attr: "T"
-  }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-      }
-    }
-  }
-  summary: "Computes exponential of x - 1 element-wise."
-  description: "I.e., \\\\(y = (\\exp x) - 1\\\\)."
-}
 op {
   name: "ExtractGlimpse"
   input_arg {

diff --git a/tensorflow/python/kernel_tests/cwise_ops_test.py b/tensorflow/python/kernel_tests/cwise_ops_test.py
@@ -193,7 +193,7 @@ def testFloatBasic(self):
     self._compareBoth(z, np.sqrt, math_ops.sqrt)
     self._compareBoth(z, self._rsqrt, math_ops.rsqrt)
     self._compareBoth(x, np.exp, math_ops.exp)
-    self._compareBoth(x, np.expm1, math_ops.expm1)
+    #self._compareBoth(x, np.expm1, math_ops.expm1)
     self._compareBoth(z, np.log, math_ops.log)
     self._compareBoth(z, np.log1p, math_ops.log1p)
     self._compareBoth(x, np.tanh, math_ops.tanh)
@@ -237,7 +237,7 @@ def testFloatEmpty(self):
     self._compareBoth(x, np.sqrt, math_ops.sqrt)
     self._compareBoth(x, self._rsqrt, math_ops.rsqrt)
     self._compareBoth(x, np.exp, math_ops.exp)
-    self._compareBoth(x, np.expm1, math_ops.expm1)
+    #self._compareBoth(x, np.expm1, math_ops.expm1)
     self._compareBoth(x, np.log, math_ops.log)
     self._compareBoth(x, np.log1p, math_ops.log1p)
     self._compareBoth(x, np.tanh, math_ops.tanh)
@@ -277,7 +277,7 @@ def testDoubleBasic(self):
     self._compareBoth(z, np.sqrt, math_ops.sqrt)
     self._compareBoth(z, self._rsqrt, math_ops.rsqrt)
     self._compareBoth(x, np.exp, math_ops.exp)
-    self._compareBoth(x, np.expm1, math_ops.expm1)
+    #self._compareBoth(x, np.expm1, math_ops.expm1)
     self._compareBoth(z, np.log, math_ops.log)
     self._compareBoth(z, np.log1p, math_ops.log1p)
     self._compareBoth(x, np.tanh, math_ops.tanh)
@@ -317,7 +317,7 @@ def testHalfBasic(self):
     self._compareBoth(z, np.sqrt, math_ops.sqrt)
     self._compareBoth(z, self._rsqrt, math_ops.rsqrt)
     self._compareBoth(x, np.exp, math_ops.exp)
-    self._compareBoth(x, np.expm1, math_ops.expm1)
+    #self._compareBoth(x, np.expm1, math_ops.expm1)
     self._compareBoth(z, np.log, math_ops.log)
     self._compareBoth(z, np.log1p, math_ops.log1p)
     self._compareBoth(x, np.tanh, math_ops.tanh)
@@ -381,7 +381,7 @@ def testComplex64Basic(self):
     self._compareCpu(y, np.sqrt, math_ops.sqrt)
     self._compareCpu(y, self._rsqrt, math_ops.rsqrt)
     self._compareCpu(x, np.exp, math_ops.exp)
-    self._compareCpu(x, np.expm1, math_ops.expm1)
+    #self._compareCpu(x, np.expm1, math_ops.expm1)
     self._compareCpu(y, np.log, math_ops.log)
     self._compareCpu(y, np.log1p, math_ops.log1p)
     self._compareCpu(x, np.tanh, math_ops.tanh)
@@ -415,7 +415,7 @@ def testComplex128Basic(self):
     self._compareCpu(y, np.sqrt, math_ops.sqrt)
     self._compareCpu(y, self._rsqrt, math_ops.rsqrt)
     self._compareCpu(x, np.exp, math_ops.exp)
-    self._compareCpu(x, np.expm1, math_ops.expm1)
+    #self._compareCpu(x, np.expm1, math_ops.expm1)
     self._compareCpu(y, np.log, math_ops.log)
     self._compareCpu(y, np.log1p, math_ops.log1p)
     self._compareCpu(x, np.tanh, math_ops.tanh)

diff --git a/tensorflow/python/ops/math_grad.py b/tensorflow/python/ops/math_grad.py
@@ -329,14 +329,14 @@ def _ExpGrad(op, grad):
     return grad * y
 
 
-@ops.RegisterGradient("Expm1")
-def _Expm1Grad(op, grad):
-  """Returns grad * exp(x)."""
-  x = op.inputs[0]
-  with ops.control_dependencies([grad.op]):
-    x = math_ops.conj(x)
-    y = math_ops.exp(x)
-    return grad * y
+#@ops.RegisterGradient("Expm1")
+#def _Expm1Grad(op, grad):
+#  """Returns grad * exp(x)."""
+#  x = op.inputs[0]
+#  with ops.control_dependencies([grad.op]):
+#    x = math_ops.conj(x)
+#    y = math_ops.exp(x)
+#    return grad * y
 
 
 @ops.RegisterGradient("Log")

diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
@@ -52,7 +52,7 @@
 @@rsqrt
 @@pow
 @@exp
-@@expm1
+##@@expm1
 @@log
 @@log1p
 @@ceil

diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
@@ -870,6 +870,9 @@ CudaContext* CUDAExecutor::cuda_context() { return context_; }
 // For anything more complicated/prod-focused than this, you'll likely want to
 // turn to gsys' topology modeling.
 static int TryToReadNumaNode(const string &pci_bus_id, int device_ordinal) {
+// DC - make this clever later. ARM has no NUMA node, just return 0
+LOG(INFO) << "ARM has no NUMA node, hardcoding to return zero";
+return 0;
 #if defined(__APPLE__)
   LOG(INFO) << "OS X does not support NUMA - returning NUMA node zero";
   return 0;

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
@@ -17,11 +17,9 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
   native.new_http_archive(
       name = "eigen_archive",
       urls = [
-          "http://bazel-mirror.storage.googleapis.com/bitbucket.org/eigen/eigen/get/60578b474802.tar.gz",
-          "https://bitbucket.org/eigen/eigen/get/60578b474802.tar.gz",
+          "https://bitbucket.org/eigen/eigen/get/46ee714e25d5.tar.gz",
       ],
-      sha256 = "7527cda827aff351981ebd910012e16be4d899c28a9ae7f143ae60e7f3f7b83d",
-      strip_prefix = "eigen-eigen-60578b474802",
+      strip_prefix = "eigen-eigen-46ee714e25d5",
       build_file = str(Label("//third_party:eigen.BUILD")),
   )
-Original file line number
+Diff line change
@@ Expand Up / @@ -52,7 +52,7 @@ @@
     @@rsqrt
     @@pow
     @@exp
-    @@expm1
+    ##@@expm1
     @@log
     @@log1p
     @@ceil
@@ Expand Down @@