bas-aarts
diff --git a/‎aten/src/ATen/autocast_mode.cpp
+1 b/‎aten/src/ATen/autocast_mode.cpp
+1
diff --git a/‎aten/src/ATen/native/RNN.cpp
+30-1 b/‎aten/src/ATen/native/RNN.cpp
+30-1
diff --git a/‎aten/src/ATen/native/RNN.h
+1 b/‎aten/src/ATen/native/RNN.h
+1
diff --git a/‎aten/src/ATen/native/mkldnn/MKLDNNCommon.h
+3 b/‎aten/src/ATen/native/mkldnn/MKLDNNCommon.h
+3
@@ -506,6 +506,7 @@ TORCH_LIBRARY_IMPL(aten, AutocastCPU, m) {
   KERNEL_CPU2(_convolution, deprecated, lower_precision_fp)
   KERNEL_CPU(matmul, lower_precision_fp)
   KERNEL_CPU(conv_tbc, lower_precision_fp)
+  KERNEL_CPU(mkldnn_rnn_layer, lower_precision_fp)
 
   // fp32 cast policy
   KERNEL_CPU(conv_transpose1d, fp32)
 
@@ -11,6 +11,7 @@
 #include <c10/util/irange.h>
 #include <torch/custom_class.h>
 #include <torch/library.h>
+#include <ATen/Config.h>
 
 #ifndef AT_PER_OPERATOR_HEADERS
 #include <ATen/Functions.h>
@@ -50,7 +51,6 @@
 #include <ATen/ops/tanh_backward.h>
 #include <ATen/ops/zeros_like.h>
 #include <ATen/ops/zeros_like_ops.h>
-
 #include <utility>
 #endif
 
@@ -69,6 +69,17 @@ bool use_miopen(const at::Tensor& input, const double dropout_state) {
     return is_miopen_acceptable;
 }
 
+bool use_mkldnn(const Tensor& input) {
+#if AT_MKLDNN_ENABLED()
+  if (!at::globalContext().userEnabledMkldnn()) {
+    return false;
+  }
+  return input.options().backend() == at::Backend::CPU &&
+      (input.scalar_type() == kFloat || input.scalar_type() == kBFloat16);
+#endif
+  return false;
+}
+
 template<typename T>
 using pair_of = std::pair<T, T>;
 
@@ -1409,6 +1420,7 @@ DEFINE_DISPATCH(lstm_cudnn_stub);
 DEFINE_DISPATCH(lstm_packed_cudnn_stub);
 DEFINE_DISPATCH(lstm_miopen_stub);
 DEFINE_DISPATCH(lstm_packed_miopen_stub);
+DEFINE_DISPATCH(lstm_mkldnn_stub);
 REGISTER_NO_CPU_DISPATCH(lstm_cudnn_stub);
 REGISTER_NO_CPU_DISPATCH(lstm_packed_cudnn_stub);
 REGISTER_NO_CPU_DISPATCH(lstm_miopen_stub);
@@ -1447,6 +1459,23 @@ std::tuple<Tensor, Tensor, Tensor> lstm(
     }
   }
 
+  if (use_mkldnn(_input)) {
+    if (!has_projections) {
+      if (hx[0].unsafeGetTensorImpl()->has_symbolic_sizes_strides()) {
+        TORCH_WARN_ONCE(
+          "LSTM with symbolic sizes and strides is not supported with oneDNN. Using default implementation.");
+      } else {
+        Tensor output, hy, cy;
+        lstm_mkldnn_stub(_input.device().type(), output, hy, cy,_input, hx, _params, has_biases,
+            num_layers, dropout_p, train, bidirectional, batch_first);
+        return std::make_tuple(std::move(output), std::move(hy), std::move(cy));
+      }
+    } else {
+      TORCH_WARN_ONCE(
+          "LSTM with projections is not supported with oneDNN. Using default implementation.");
+    }
+  }
+
   check_attributes(_input, _params, hx);
   auto input = batch_first ? _input.transpose(0, 1) : _input;
   auto params = gather_params(_params, has_biases, has_projections);
 
@@ -12,6 +12,7 @@ using rnn_packed_fn = void(*)(Tensor&, Tensor&, const Tensor&, const Tensor&, co
 
 DECLARE_DISPATCH(lstm_fn, lstm_cudnn_stub);
 DECLARE_DISPATCH(lstm_fn, lstm_miopen_stub);
+DECLARE_DISPATCH(lstm_fn, lstm_mkldnn_stub);
 DECLARE_DISPATCH(rnn_fn, gru_cudnn_stub);
 DECLARE_DISPATCH(rnn_fn, gru_miopen_stub);
 DECLARE_DISPATCH(rnn_fn, rnn_tanh_cudnn_stub);
 
@@ -10,6 +10,9 @@ namespace at { namespace native {
 
 // Mapping ScalarType to ideep tensor data_type
 TORCH_API ideep::tensor::data_type get_mkldnn_dtype(ScalarType type);
+static inline ideep::tensor::data_type get_mkldnn_dtype(const Tensor& t) {
+  return get_mkldnn_dtype(t.scalar_type());
+}
 
 // Construct aten MKL-DNN tensor given an ideep tensor
 TORCH_API Tensor new_with_itensor_mkldnn(ideep::tensor&& it, c10::optional<ScalarType> dtype, c10::optional<Device> device);