Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rnn Relu refactoring #2541

Open
wants to merge 32 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 27 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
13da580
Added RNN_RELU and RNN_TANH forward training refactored functions
kikimych Jun 14, 2023
c3d8ab4
Added RNNForwardTrainingGRU method
kikimych Jul 25, 2023
263c278
Merge branch 'develop' of https://github.com/ROCmSoftwarePlatform/MIO…
kikimych Jul 26, 2023
f247d7d
Added RNN Backward Data refactor
kikimych Aug 31, 2023
7bb3be5
Added bidirection support for RNNBackwardData
kikimych Sep 15, 2023
3c889de
Unified relu and gru offset interfaces
kikimych Sep 15, 2023
cf96713
Lstm minor refactoring
kikimych Sep 27, 2023
c2bfee2
Lstm minor refactoring
kikimych Sep 27, 2023
cdce2f5
Reverted back RNNForwardTraining_MS method
kikimych Oct 27, 2023
b114b9b
Merge branch 'rnn_training_forward_refactor' of https://github.com/ki…
kikimych Oct 27, 2023
12e4a43
Rnn relu bidirectional support draft
kikimych Nov 3, 2023
d2a166b
Refactored Relu forward
kikimych Nov 9, 2023
f0d1d0a
RNNBackwardDataPackedTensorsRelu refactor
kikimych Nov 11, 2023
e8de935
Simplified RNNBackwardDataPackedTensorsRelu
kikimych Nov 15, 2023
ee524b6
Relu forward minor refactor
kikimych Nov 15, 2023
5cc2e09
Removed hidden_size from offset helpers
kikimych Nov 17, 2023
7ca8c32
Enabled RNNForwardTrainingTanhRelu by default
kikimych Nov 17, 2023
25dc932
Merge branch 'develop' into rnn_training_forward_refactor
kikimych Nov 17, 2023
e9f20d5
RNN Relu and Tanh activation functions refactoring
kikimych Nov 20, 2023
8ebdcf4
-no-hx --no-dhy --no-hy --no-dhx modes fix
kikimych Dec 1, 2023
c1ac5af
Merge branch 'develop' into relu_refactor
kikimych Dec 4, 2023
92cc941
Merge branch 'develop' into relu_refactor
kikimych Dec 11, 2023
1745012
Pull request 2541 review fixes
kikimych Dec 18, 2023
d795893
Added profiling
kikimych Dec 18, 2023
b93c0d3
Removed ocl profiling
kikimych Dec 18, 2023
87329c6
review fixes
kikimych Dec 19, 2023
7ae3116
review fixes
kikimych Dec 20, 2023
15a891f
minor fix
kikimych Dec 21, 2023
ac66ac0
Review fixes. Made RnnDirection enum class. Removed RnnOffsets class.…
kikimych Jan 11, 2024
6f25e7d
Enabled miopenRNNskip mode in RNNForwardTrainingTanhRelu
kikimych Jan 11, 2024
a145ecb
Descriptors renaming
kikimych Jan 12, 2024
ade5f3a
RNNBackwardData function renaming
kikimych Jan 27, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions src/include/miopen/rnn.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,21 @@ struct RNNDescriptor : miopenRNNDescriptor
Data_t reserveSpace,
size_t reserveSpaceSize) const;

void RNNForwardTrainingTanhRelu(Handle& handle,
std::vector<int>& seq_array,
const TensorDescriptor& xDesc,
ConstData_t x,
const TensorDescriptor& hxDesc,
ConstData_t hx,
const TensorDescriptor& wDesc,
ConstData_t w,
const TensorDescriptor& yDesc,
Data_t y,
const TensorDescriptor& hyDesc,
Data_t hy,
Data_t reserveSpace,
size_t reserveSpaceSize) const;

void RNNForwardInference(Handle& handle,
int seqLen,
c_array_view<const miopenTensorDescriptor_t> xDesc,
Expand Down Expand Up @@ -462,6 +477,21 @@ struct RNNDescriptor : miopenRNNDescriptor
ConstData_t reserveSpace,
size_t reserveSpaceSize) const;

void RNNBackwardDataPackedTensorsRelu(Handle& handle,
int seqLen,
c_array_view<const miopenTensorDescriptor_t> dyDesc,
ConstData_t dy,
ConstData_t dhy,
ConstData_t w,
c_array_view<const miopenTensorDescriptor_t> dxDesc,
Data_t dx,
const TensorDescriptor& dhxDesc,
Data_t dhx,
Data_t workSpace,
size_t workSpaceSize,
Data_t reserveSpace,
size_t reserveSpaceSize) const;

void RNNForwardTrainingPackedTensors(Handle& handle,
int seqLen,
c_array_view<const miopenTensorDescriptor_t> xDesc,
Expand Down
151 changes: 151 additions & 0 deletions src/include/miopen/rnn_util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@

namespace miopen {

enum rnn_direction
{
Forward = 0,
Backward = 1
};
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's mostly a question for @atamazov - do we use enum class or have plans to switch to enum class everywhere?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's prefer enum class. But switching to it everywhere is impossible IIRC.


#if MIOPEN_BACKEND_HIP
inline void RNNProfilingBegin(const miopen::Handle& handle,
miopen::HipEventPtr& start,
Expand Down Expand Up @@ -121,6 +127,151 @@ void LSTMBackwardHiddenStateUpdate(const Handle& handle,
std::size_t dhidden_offset,
std::size_t f_offset_pre);

struct RNNWeightOffsets
{

public:
int input_offset(int layer) const;
int hidden_offset(int layer) const;
int bias_off();
int bias_off(int layer) const;

private:
int first_layer_offset() const;
};

struct ReluWeightOffsets : public RNNWeightOffsets
{
public:
ReluWeightOffsets(int input_vector_sz,
int hidden_vec_sz,
int layers_cnt,
int bias_mode,
int bi,
int nHiddenTensorsPerLayer)
: weight_stride(hidden_vec_sz * bi * nHiddenTensorsPerLayer),
in_vec_sz(input_vector_sz),
h_vec_sz(hidden_vec_sz),
num_layers(layers_cnt),
bi_scale(bi),
bias_count(bias_mode)
{
}

int input_weight_offset(int layer) const
{
return layer == 0 ? 0
: first_layer_offset() +
(h_vec_sz + h_vec_sz * bi_scale) * weight_stride * (layer - 1);
}

int hidden_weight_offset(int layer, int reverse = 0) const
{
return layer == 0 ? input_weight_offset(layer) + in_vec_sz * weight_stride +
reverse * h_vec_sz * h_vec_sz
: input_weight_offset(layer) + bi_scale * h_vec_sz * weight_stride +
reverse * h_vec_sz * h_vec_sz;
}

size_t bias_stride() const { return static_cast<size_t>(h_vec_sz) * bi_scale; }

int bias_off() const
{
return first_layer_offset() +
(h_vec_sz * bi_scale + h_vec_sz) * (num_layers - 1) * weight_stride;
}

int bias_off(int layer_id) const { return bias_off() + bias_count * layer_id * weight_stride; }
int weight_stride;

private:
const int in_vec_sz, h_vec_sz;

public:
const int num_layers;
const int bi_scale = 1;
const int bias_count = 0;

int first_layer_offset() const { return (in_vec_sz + h_vec_sz) * weight_stride; }
};

struct RNNOffsets
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How is this class used and why is there no definition of its methods?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

removed

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not fixed

{
size_t layer_offset(int layer_id) const;

size_t layer_stride() const;

int gemm_write_size() const;

size_t gemm_write_stride() const;

size_t gemm_write_offset(int layer_id, int batch_id = 0, int reverse = 0) const;

size_t hidden_offset(int layer_id, int batch_id = 0, int reverse = 0) const;
};

struct ReluReserveBufferOffsets : public RNNOffsets
{
struct RBuffHelper
{
int element, save_point, batch;
size_t layer, table;
};

private:
auto Reserve_Buffer_strides(int save_point_sz, int batches_per_l, int layers_cnt) const
{
const auto element_st = 1;
const auto save_point_st = element_st * save_point_sz;
const auto batch_st = save_point_st;
const auto layer_st = static_cast<size_t>(batch_st) * batches_per_l;
const auto table_st = layers_cnt * layer_st;

return RBuffHelper{element_st, save_point_st, batch_st, layer_st, table_st};
}

public:
ReluReserveBufferOffsets(int hidden_vec_size, int layers_cnt, int batches_per_l, int bi_scale, int workspace_scale)
: hidden_size(hidden_vec_size),
batches_per_layer(batches_per_l),
save_point_size(hidden_vec_size * bi_scale * workspace_scale),
layers(layers_cnt),
strides(Reserve_Buffer_strides(save_point_size, batches_per_l, layers_cnt))
{
}

size_t layer_offset(int layer_id) const
{
return static_cast<size_t>(layer_id) * strides.layer;
}

size_t layer_stride() const { return strides.layer; }

int gemm_write_size() const { return strides.save_point; }

size_t gemm_write_stride() const { return strides.batch; }

size_t gemm_write_offset(int layer_id, int batch_id, int reverse) const
{
return layer_offset(layer_id) + static_cast<size_t>(gemm_write_stride()) * batch_id +
(size_t)reverse * hidden_size;
}

size_t hidden_offset(int layer_id, int batch_id, int reverse) const
{
return strides.table + gemm_write_offset(layer_id, batch_id, reverse);
}

private:
const int hidden_size;

public:
const int batches_per_layer;
const int save_point_size;
const int layers;
const RBuffHelper strides;
};

struct RNNTensorPaddingConverter
{
static void ConvertTensorData(const Handle& handle,
Expand Down
Loading