Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement L1Loss #3401

Open
wants to merge 34 commits into
base: develop
Choose a base branch
from
Open
Changes from 1 commit
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
8de41ef
add kernel for l1lossreducedforward5d
cognaiger9 May 13, 2024
0d00d6d
draft for utilities
cognaiger9 May 16, 2024
005fd3c
add 3 files in include/miopen/l1loss
cognaiger9 May 16, 2024
c3ba011
pull new driver code
cognaiger9 May 17, 2024
a508bbb
Merge branch 'develop-moreh' of github.com:ngoccoder/MIOpen into impl…
cognaiger9 May 17, 2024
de7c0e6
add driver code
cognaiger9 May 17, 2024
7e2014f
fix bug related to workspace
cognaiger9 May 21, 2024
463df2b
add driver for small sized tensor, need to investigate more
cognaiger9 May 22, 2024
415c191
add gtest script
cognaiger9 May 22, 2024
9b1c403
complete gtest cpu and gpu
cognaiger9 May 23, 2024
3bd6980
draft backward phase of l1loss
cognaiger9 May 24, 2024
256c2e0
add driver
cognaiger9 May 24, 2024
4524b09
complete driver for l1loss
cognaiger9 May 27, 2024
8b2f2f5
fix bug related to bfp16 data type in gtest
cognaiger9 May 27, 2024
f2c0750
add filter for forward case
cognaiger9 May 27, 2024
5bc0dfb
add only l1loss forward reduced
cognaiger9 May 30, 2024
3609cb0
remove redundant part
cognaiger9 Jun 4, 2024
8851320
merge rocm develop
cognaiger9 Jul 30, 2024
cdf3853
update benchmark method
cognaiger9 Aug 1, 2024
eeb971d
commit change
cognaiger9 Aug 5, 2024
eb48e6f
Merge branch 'develop' of https://github.com/ROCm/MIOpen into impl_l1…
cognaiger9 Nov 20, 2024
5fa31e6
change reduction procedure, still get inf result
cognaiger9 Nov 22, 2024
6e05582
fix gtest and driver
cognaiger9 Nov 22, 2024
ce8be4f
Merge branch 'develop' of https://github.com/ROCm/MIOpen into impl_l1…
cognaiger9 Nov 22, 2024
675fe1c
Merge branch 'develop' of https://github.com/ROCm/MIOpen into impl_l1…
cognaiger9 Nov 25, 2024
bd01023
Merge branch 'develop' into impl_l1loss_merged
long10024070 Dec 3, 2024
9a77f67
Merge branch 'develop' of https://github.com/ROCm/MIOpen into impl_l1…
cognaiger9 Dec 12, 2024
aa1e986
Merge branch 'develop' into impl_l1loss_merged
cognaiger9 Dec 12, 2024
e113fc8
Merge branch 'develop' into impl_l1loss_merged
long10024070 Dec 23, 2024
13f8e0b
Merge branch 'develop' into impl_l1loss_merged
long10024070 Jan 14, 2025
c2125cd
Merge branch 'develop' into impl_l1loss_merged
long10024070 Jan 14, 2025
7e1f6fa
Merge branch 'develop' of https://github.com/ROCm/MIOpen into impl_l1…
cognaiger9 Feb 13, 2025
be81398
Merge branch 'impl_l1loss_merged' of https://github.com/ROCm/MIOpen i…
cognaiger9 Feb 13, 2025
522026f
Merge branch 'develop' of https://github.com/ROCm/MIOpen into impl_l1…
cognaiger9 Feb 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fix gtest and driver
cognaiger9 committed Nov 22, 2024

Verified

This commit was signed with the committer’s verified signature.
snyk-bot Snyk bot
commit 6e055828fe70227114069d8c56a3d6673807d21c
25 changes: 8 additions & 17 deletions driver/l1loss_driver.hpp
Original file line number Diff line number Diff line change
@@ -45,26 +45,19 @@ template <typename Tgpu, typename Tcheck>
int mloL1LossReducedForwardRunHost(const miopenTensorDescriptor_t iDesc,
const Tgpu* input,
const Tgpu* target,
Tcheck* workspacehost,
Tcheck* outputhost,
miopenLossReductionMode_t reduction)
{
auto size = miopen::deref(iDesc).GetElementSize();
size_t divisor = (reduction == MIOPEN_LOSS_REDUCTION_MEAN) ? size : 1;

// Phase 1: Calc loss for each element
double output = 0.0;
for(size_t i = 0; i < size; i++)
{
workspacehost[i] = abs(input[i] - target[i]) / divisor;
float diff = abs(static_cast<float>(input[i]) - static_cast<float>(target[i]));
output += diff;
}

// Phase 2: Reduce
float output = 0.0;
for(size_t i = 0; i < size; i++)
{
output += workspacehost[i];
}
outputhost[0] = output;
outputhost[0] = output / divisor;

return 0;
}
@@ -128,7 +121,6 @@ class L1LossDriver : public Driver
std::vector<Tgpu> workspace;

std::vector<Tref> outhost;
std::vector<Tref> workspacehost;

size_t ws_sizeInBytes;
miopenLossReductionMode_t reduction;
@@ -199,9 +191,9 @@ int L1LossDriver<Tgpu, Tref>::AddCmdLineArgs()
"int");
inflags.AddInputFlag("reduction",
'R',
"0",
"2",
"Reduction mode ('none'(0) | 'sum'(1) |'mean'(2)) "
"(Default=0)",
"(Default=2)",
"int");
inflags.AddInputFlag("iter", 'i', "10", "Number of Iterations (Default=10)", "int");
inflags.AddInputFlag("verify", 'V', "1", "Verify Each Layer (Default=1)", "int");
@@ -239,8 +231,7 @@ int L1LossDriver<Tgpu, Tref>::AllocateBuffersAndCopy()
out = std::vector<Tgpu>(out_sz, static_cast<Tgpu>(0));
workspace = std::vector<Tgpu>(ws_sz, static_cast<Tgpu>(0));

outhost = std::vector<Tref>(out_sz, static_cast<Tref>(0));
workspacehost = std::vector<Tref>(ws_sz, static_cast<Tref>(0));
outhost = std::vector<Tref>(out_sz, static_cast<Tref>(0));

for(int i = 0; i < in_sz; i++)
{
@@ -318,7 +309,7 @@ int L1LossDriver<Tgpu, Tref>::RunForwardCPU()
if(reduction == MIOPEN_LOSS_REDUCTION_MEAN || reduction == MIOPEN_LOSS_REDUCTION_SUM)
{
mloL1LossReducedForwardRunHost<Tgpu, Tref>(
inputDesc, in.data(), tar.data(), workspacehost.data(), outhost.data(), reduction);
inputDesc, in.data(), tar.data(), outhost.data(), reduction);
}

return miopenStatusSuccess;
20 changes: 5 additions & 15 deletions test/gtest/l1loss.hpp
Original file line number Diff line number Diff line change
@@ -82,10 +82,10 @@ inline std::vector<L1LossTestCase> GenFullTestCases()
{{1, 1, 1, 1, 1}, MIOPEN_LOSS_REDUCTION_SUM, false},
{{1, 2, 3, 4, 1}, MIOPEN_LOSS_REDUCTION_SUM, false},
{{1, 1, 1, 257, 1}, MIOPEN_LOSS_REDUCTION_SUM, false},
{{2, 10, 128, 128, 1}, MIOPEN_LOSS_REDUCTION_SUM, false},
{{2, 10, 128, 64, 1}, MIOPEN_LOSS_REDUCTION_MEAN, false},
{{5, 13, 17, 11, 1}, MIOPEN_LOSS_REDUCTION_MEAN, false},
{{256, 4, 8723, 1, 1}, MIOPEN_LOSS_REDUCTION_SUM, false},
{{256, 4, 8723, 1, 1}, MIOPEN_LOSS_REDUCTION_SUM, true},
{{256, 4, 128, 1, 1}, MIOPEN_LOSS_REDUCTION_MEAN, false},
{{256, 4, 128, 1, 1}, MIOPEN_LOSS_REDUCTION_MEAN, true},
{{1, 1, 1, 1, 1}, MIOPEN_LOSS_REDUCTION_SUM, true},
{{34, 4, 5, 1, 1}, MIOPEN_LOSS_REDUCTION_SUM, true},
{{4, 7, 5, 1, 1}, MIOPEN_LOSS_REDUCTION_SUM, true},
@@ -174,24 +174,14 @@ struct L1LossFwdTest : public ::testing::TestWithParam<L1LossTestCase>

double GetTolerance()
{
// Computation error of fp16 is ~2^13 (=8192) bigger than
// the one of fp32 because mantissa is shorter by 13 bits.
double tolerance = std::is_same<T, float>::value ? 1.5e-6 : 8.2e-3;

// bf16 mantissa has 7 bits, by 3 bits shorter than fp16.
if(std::is_same<T, bfloat16>::value)
tolerance *= 8.0;

double tolerance = std::numeric_limits<T>::epsilon() * 10;
return tolerance;
}

void Verify()
{
double threshold = GetTolerance();

auto error = miopen::rms_range(ref_output, output);

std::cout << "cpu output: " << ref_output[0] << "gpu output" << output[0] << std::endl;
auto error = miopen::rms_range(ref_output, output);

EXPECT_TRUE(error < threshold * 10) << "Error output beyond tolerance Error: " << error
<< ", Tolerance: " << threshold * 10;