Skip to content

Commit

Permalink
test: fix RoPETest.
Browse files Browse the repository at this point in the history
fix: optimize cos_value.

Signed-off-by: Lee <[email protected]>
  • Loading branch information
lx200916 committed Oct 19, 2023
1 parent e0404e7 commit 65ace63
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 15 deletions.
12 changes: 10 additions & 2 deletions src/backends/cpu/CPURoPE.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,16 @@ void sinusoidal_position_embedding(int batch_size, int nums_head, int seq_len, i
for (int n = 0; n < batch_size; ++n) {
for (int h = 0; h < nums_head; ++h) {
for (int s = 0; s < seq_len; ++s) {
for (int d = 0; d < output_dim; ++d) {
for (int d = 0; d < output_dim; d += 2) {
int i = (int)d / 2;
float sin_value = std::sin(s / std::pow(10000, 2.0 * i / output_dim));
float cos_value = std::cos(s / std::pow(10000, 2.0 * i / output_dim));
sin.setDataAt<float>(n, h, s, d, sin_value);
cos.setDataAt<float>(n, h, s, d, cos_value);
if (d + 1 < output_dim) {
sin.setDataAt<float>(n, h, s, d + 1, sin_value);
cos.setDataAt<float>(n, h, s, d + 1, cos_value);
}
}
}
}
Expand All @@ -31,7 +35,7 @@ void sinusoidal_position_embedding_hf(int batch_size, int nums_head, int seq_len
for (int n = 0; n < batch_size; ++n) {
for (int h = 0; h < nums_head; ++h) {
for (int s = 0; s < seq_len; ++s) {
for (int d = 0; d < output_dim; ++d) {
for (int d = 0; d < output_dim; d += 2) {
int i = (int)d;
if (d >= (int)output_dim / 2) {
i = (int)(d - output_dim / 2);
Expand All @@ -40,6 +44,10 @@ void sinusoidal_position_embedding_hf(int batch_size, int nums_head, int seq_len
float cos_value = std::cos(s / std::pow(10000, 2.0 * i / output_dim));
sin.setDataAt<float>(n, h, s, d, sin_value);
cos.setDataAt<float>(n, h, s, d, cos_value);
if (d + 1 < output_dim) {
sin.setDataAt<float>(n, h, s, d + 1, sin_value);
cos.setDataAt<float>(n, h, s, d + 1, cos_value);
}
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion test/cpu/CPURoPETest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@ TEST_F(CPUTest, CPURoPE1) {
TEST_SETUP({input0}, {c_output});
TEST_EXCUTE({input0}, {c_output});
PRINT_TENSOR_SHAPES(input0, c_output, output);
COMPARE_TENSOR(output, c_output);
COMPARE_TENSOR(output, c_output, true);
}
20 changes: 9 additions & 11 deletions test/cpu/CPURoPETest.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,6 @@ def precompute_freqs_cis(dim: int, end: int, theta: float = 10000.0):
Returns:
torch.Tensor: Precomputed frequency tensor with complex exponentials.
"""
freqs = 1.0 / (theta ** (torch.arange(0, dim, 2)[: (dim // 2)].float() / dim))
t = torch.arange(end, device=freqs.device) # type: ignore
Expand Down Expand Up @@ -80,15 +77,15 @@ def apply_rotary_emb(
Tuple[torch.Tensor, torch.Tensor]: Tuple of modified query tensor and key tensor with rotary embeddings.
"""
xq_ = torch.view_as_complex(xq.float().reshape(*xq.shape[:-1], -1, 2))
xk_ = torch.view_as_complex(xk.float().reshape(*xk.shape[:-1], -1, 2))
# xk_ = torch.view_as_complex(xk.float().reshape(*xk.shape[:-1], -1, 2))
print(xq_.shape)
freqs_cis = reshape_for_broadcast(freqs_cis, xq_)
print(freqs_cis.shape)
xq_out = torch.view_as_real(xq_ * freqs_cis).flatten(3)
xk_out = torch.view_as_real(xk_ * freqs_cis).flatten(3)
print(xk_out.shape)
# xk_out = torch.view_as_real(xk_ * freqs_cis).flatten(3)
# print(xk_out.shape)

return xq_out.type_as(xq), xk_out.type_as(xk)
return xq_out.type_as(xq)


class RoPE(torch.nn.Module):
Expand All @@ -98,10 +95,9 @@ def __init__(self, ):
def forward(self, out):
seq_len = out.shape[1]
dim = out.shape[-1]
start_pos = 0
freqs_cis = precompute_freqs_cis(dim, seq_len * 2)
freqs_cis = freqs_cis[start_pos: start_pos + seq_len]
out, _ = apply_rotary_emb(out, out, freqs_cis)
freqs_cis = precompute_freqs_cis(dim, seq_len)
# freqs_cis = freqs_cis[start_pos: start_pos + seq_len]
out = apply_rotary_emb(out, out, freqs_cis)
return out


Expand All @@ -111,6 +107,8 @@ def test(self):
model = RoPE()
output = model(input0)
print(output.shape)
input0 = input0.transpose(1, 2)
output = output.transpose(1, 2)
self.test_done(True)


Expand Down
3 changes: 2 additions & 1 deletion test/cpu/CPUTest.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,10 @@ static bool isSame(Tensor *a, Tensor *b, bool unstrict = false) {
double a_ = a->dataAt<float>({i, j, k, l});
double b_ = b->dataAt<float>({i, j, k, l});
// if ((a_ < b_) || (a_ > b_)) {
if (abs(a_ - b_) / std::max(a_, b_) > eps) {
if ((abs(a_ - b_) / std::max(a_, b_)) > eps) {
std::cout << std::setprecision(8) << setiosflags(std::ios::fixed | std::ios::showpoint) << "a[" << i << "," << j << "," << k << "," << l << "]: " << (double)a->dataAt<float>(i, j, k, l) << "!= b[" << i << "," << j << "," << k << "," << l << "]: " << (double)b->dataAt<float>(i, j, k, l) << std::endl;
// return false;
std::cout << std::setprecision(8) << setiosflags(std::ios::fixed | std::ios::showpoint) << "Diff:" << abs(a_ - b_) / std::max(a_, b_) << std::endl;
flag += 1;
if (flag > 10) {
return false;
Expand Down

0 comments on commit 65ace63

Please sign in to comment.