Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: improved quantization error #368

Open
wants to merge 1 commit into
base: fix/improve-quantization-error
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
265 changes: 265 additions & 0 deletions includes/acl/compression/impl/normalize_streams.h

Large diffs are not rendered by default.

43 changes: 43 additions & 0 deletions includes/acl/compression/impl/normalize_track_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,24 +42,65 @@ namespace acl
{
using namespace rtm;

#ifdef ACL_PRECISION_BOOST

const vector4f half = rtm::vector_set(0.5F);
const vector4f half_neg = rtm::vector_set(-0.5F);

#else

const vector4f one = rtm::vector_set(1.0F);
const vector4f zero = vector_zero();

#endif

track_vector4f& typed_track = track_cast<track_vector4f>(mut_track);

const uint32_t num_samples = mut_track.get_num_samples();

#ifdef ACL_PRECISION_BOOST

const vector4f range_center = range.get_center();

#else
const vector4f range_min = range.get_min();

#endif

const vector4f range_extent = range.get_extent();
const mask4f is_range_zero_mask = vector_less_than(range_extent, rtm::vector_set(0.000000001F));

for (uint32_t sample_index = 0; sample_index < num_samples; ++sample_index)
{

#ifdef ACL_PRECISION_BOOST

// normalized value is between [-0.5 .. 0.5]
// value = (normalized value * range extent) + range center
// normalized value = (value - range center) / range extent

#else

// normalized value is between [0.0 .. 1.0]
// value = (normalized value * range extent) + range min
// normalized value = (value - range min) / range extent

#endif

const vector4f sample = typed_track[sample_index];

#ifdef ACL_PRECISION_BOOST

vector4f normalized_sample = vector_div(vector_sub(sample, range_center), range_extent);

// Clamp because the division might be imprecise
normalized_sample = rtm::vector_clamp(normalized_sample, half_neg, half);
normalized_sample = rtm::vector_select(is_range_zero_mask, half_neg, normalized_sample);

ACL_ASSERT(vector_all_greater_equal(normalized_sample, half_neg) && vector_all_less_equal(normalized_sample, half), "Invalid normalized value. -0.5 <= [%f, %f, %f, %f] <= 0.5", (float)vector_get_x(normalized_sample), (float)vector_get_y(normalized_sample), (float)vector_get_z(normalized_sample), (float)vector_get_w(normalized_sample));

#else

vector4f normalized_sample = vector_div(vector_sub(sample, range_min), range_extent);

// Clamp because the division might be imprecise
Expand All @@ -68,6 +109,8 @@ namespace acl

ACL_ASSERT(vector_all_greater_equal(normalized_sample, zero) && vector_all_less_equal(normalized_sample, one), "Invalid normalized value. 0.0 <= [%f, %f, %f, %f] <= 1.0", (float)vector_get_x(normalized_sample), (float)vector_get_y(normalized_sample), (float)vector_get_z(normalized_sample), (float)vector_get_w(normalized_sample));

#endif

typed_track[sample_index] = normalized_sample;
}
}
Expand Down
60 changes: 60 additions & 0 deletions includes/acl/compression/impl/quantize_streams.h
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,17 @@ namespace acl
const rtm::vector4f normalized_rotation = normalize_sample(rotation, clip_range);

uint8_t* quantized_ptr = quantized_stream.get_raw_sample_ptr(0);

#ifdef ACL_PRECISION_BOOST

pack_vector3_sn48_unsafe_precise_endpoints(normalized_rotation, quantized_ptr);

#else

pack_vector3_u48_unsafe(normalized_rotation, quantized_ptr);

#endif

}
else
{
Expand All @@ -372,7 +382,17 @@ namespace acl
else
{
const rtm::quatf rotation = raw_segment_stream.get_raw_sample<rtm::quatf>(sample_index);

#ifdef ACL_PRECISION_BOOST

pack_vector3_snXX_unsafe(rtm::quat_to_vector(rotation), num_bits_at_bit_rate, quantized_ptr);

#else

pack_vector3_uXX_unsafe(rtm::quat_to_vector(rotation), num_bits_at_bit_rate, quantized_ptr);

#endif

}
}
}
Expand Down Expand Up @@ -465,7 +485,17 @@ namespace acl
const rtm::vector4f normalized_translation = normalize_sample(translation, clip_range);

uint8_t* quantized_ptr = quantized_stream.get_raw_sample_ptr(0);

#ifdef ACL_PRECISION_BOOST

pack_vector3_sn48_unsafe_precise_endpoints(normalized_translation, quantized_ptr);

#else

pack_vector3_u48_unsafe(normalized_translation, quantized_ptr);

#endif

}
else
{
Expand All @@ -483,7 +513,17 @@ namespace acl
else
{
const rtm::vector4f translation = raw_segment_stream.get_raw_sample<rtm::vector4f>(sample_index);

#ifdef ACL_PRECISION_BOOST

pack_vector3_snXX_unsafe(translation, num_bits_at_bit_rate, quantized_ptr);

#else

pack_vector3_uXX_unsafe(translation, num_bits_at_bit_rate, quantized_ptr);

#endif

}
}
}
Expand Down Expand Up @@ -575,7 +615,17 @@ namespace acl
const rtm::vector4f normalized_scale = normalize_sample(scale, clip_range);

uint8_t* quantized_ptr = quantized_stream.get_raw_sample_ptr(0);

#ifdef ACL_PRECISION_BOOST

pack_vector3_sn48_unsafe_precise_endpoints(normalized_scale, quantized_ptr);

#else

pack_vector3_u48_unsafe(normalized_scale, quantized_ptr);

#endif

}
else
{
Expand All @@ -593,7 +643,17 @@ namespace acl
else
{
const rtm::vector4f scale = raw_segment_stream.get_raw_sample<rtm::vector4f>(sample_index);

#ifdef ACL_PRECISION_BOOST

pack_vector3_snXX_unsafe(scale, num_bits_at_bit_rate, quantized_ptr);

#else

pack_vector3_uXX_unsafe(scale, num_bits_at_bit_rate, quantized_ptr);

#endif

}
}
}
Expand Down
88 changes: 88 additions & 0 deletions includes/acl/compression/impl/quantize_track_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,18 +45,57 @@ namespace acl
rtm::vector4f max_value;
rtm::vector4f inv_max_value;

#ifdef ACL_PRECISION_BOOST

rtm::vector4f limit;
rtm::vector4f mid_compress;
rtm::vector4f mid_decompress;

#endif

explicit quantization_scales(uint32_t num_bits)
{
ACL_ASSERT(num_bits > 0, "Cannot decay with 0 bits");

#ifdef ACL_PRECISION_BOOST

ACL_ASSERT(num_bits < 25, "Attempting to decay on too many bits");

const float max_value_ = rtm::scalar_safe_to_float(1 << num_bits);
limit = rtm::vector_set(max_value_ - 1.0F);
mid_compress = rtm::vector_set(0.5F * max_value_);
mid_decompress = rtm::vector_set((0.5F * max_value_) - 0.5F);
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note: for 8 bits, limit = 255, mid_compress = 128, mid_decompress = 127.5


#else

ACL_ASSERT(num_bits < 31, "Attempting to decay on too many bits");

const float max_value_ = rtm::scalar_safe_to_float((1 << num_bits) - 1);

#endif

max_value = rtm::vector_set(max_value_);
inv_max_value = rtm::vector_set(1.0F / max_value_);
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note: for 8 bits, max_value = 256, inv_max_value = 1/256

}
};

// Decays the input value through quantization by packing and unpacking a normalized input value

#ifdef ACL_PRECISION_BOOST

inline rtm::vector4f RTM_SIMD_CALL decay_vector4_snXX(rtm::vector4f_arg0 value, const quantization_scales& scales)
{
using namespace rtm;

ACL_ASSERT(vector_all_greater_equal(value, rtm::vector_set(-0.5F)) && vector_all_less_equal(value, rtm::vector_set(0.5F)), "Expected normalized signed input value: %f, %f, %f, %f", (float)vector_get_x(value), (float)vector_get_y(value), (float)vector_get_z(value), (float)vector_get_w(value));

const vector4f packed_value = vector_min(vector_add(vector_floor(vector_mul(value, scales.max_value)), scales.mid_compress), scales.limit);
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note: If our input value is -0.5 and we use 8 bits, we have:
packed_value = min(floor(-0.5 * 256) + 128, 255)
packed_value = min(floor(-128) + 128, 255)
packed_value = min(0, 255)

If our input is 0.5, we have:
packed_value = min(floor(0.5 * 256) + 128, 255)
packed_value = min(floor(128) + 128, 255)
packed_value = min(256, 255)

If our input is [0.496, 0.5], we have:
packed_value = min(floor(0.496 * 256) + 128, 255)
packed_value = min(floor(127) + 128, 255)
packed_value = min(255, 255)

It works similarly for [-0.5, -0.496]

const vector4f decayed_value = vector_mul(vector_sub(packed_value, scales.mid_decompress), scales.inv_max_value);
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note: for packed_value = 0, we have:
decayed_value = (0 - 127.5) / 256 = -0.498

For packed_value = 255:
decayed_value = (255 - 127.5) / 256 = 0.498

return decayed_value;
}

#else

inline rtm::vector4f RTM_SIMD_CALL decay_vector4_uXX(rtm::vector4f_arg0 value, const quantization_scales& scales)
{
using namespace rtm;
Expand All @@ -67,8 +106,23 @@ namespace acl
const vector4f decayed_value = vector_mul(packed_value, scales.inv_max_value);
return decayed_value;
}

#endif

// Packs a normalized input value through quantization

#ifdef ACL_PRECISION_BOOST

inline rtm::vector4f RTM_SIMD_CALL pack_vector4_snXX(rtm::vector4f_arg0 value, const quantization_scales& scales)
{
using namespace rtm;
ACL_ASSERT(vector_all_greater_equal(value, rtm::vector_set(-0.5F)) && vector_all_less_equal(value, rtm::vector_set(0.5F)), "Expected normalized signed input value: %f, %f, %f, %f", (float)vector_get_x(value), (float)vector_get_y(value), (float)vector_get_z(value), (float)vector_get_w(value));

return vector_min(vector_add(vector_floor(vector_mul(value, scales.max_value)), scales.mid_compress), scales.limit);
}

#else

inline rtm::vector4f RTM_SIMD_CALL pack_vector4_uXX(rtm::vector4f_arg0 value, const quantization_scales& scales)
{
using namespace rtm;
Expand All @@ -78,6 +132,8 @@ namespace acl
return vector_round_symmetric(vector_mul(value, scales.max_value));
}

#endif

inline void quantize_scalarf_track(track_list_context& context, uint32_t track_index)
{
using namespace rtm;
Expand All @@ -90,7 +146,17 @@ namespace acl
const uint32_t num_samples = mut_track.get_num_samples();

const scalarf_range& range = context.range_list[track_index].range.scalarf;

#ifdef ACL_PRECISION_BOOST

const vector4f range_center = range.get_center();

#else

const vector4f range_min = range.get_min();

#endif

const vector4f range_extent = range.get_extent();

const vector4f zero = vector_zero();
Expand All @@ -113,13 +179,25 @@ namespace acl
std::memcpy(&raw_sample, ref_track[sample_index], ref_element_size);

const vector4f normalized_sample = mut_track[sample_index];

#ifdef ACL_PRECISION_BOOST

// Decay our value through quantization
const vector4f decayed_normalized_sample = decay_vector4_snXX(normalized_sample, scales);

// Undo normalization
const vector4f decayed_sample = vector_mul_add(decayed_normalized_sample, range_extent, range_center);

#else

// Decay our value through quantization
const vector4f decayed_normalized_sample = decay_vector4_uXX(normalized_sample, scales);

// Undo normalization
const vector4f decayed_sample = vector_mul_add(decayed_normalized_sample, range_extent, range_min);

#endif

const vector4f delta = vector_abs(vector_sub(raw_sample, decayed_sample));
const vector4f masked_delta = vector_select(sample_mask, delta, zero);
if (!vector_all_less_equal(masked_delta, precision))
Expand Down Expand Up @@ -152,7 +230,17 @@ namespace acl
const quantization_scales scales(num_bits_at_bit_rate);

for (uint32_t sample_index = 0; sample_index < num_samples; ++sample_index)

#ifdef ACL_PRECISION_BOOST

mut_track[sample_index] = pack_vector4_snXX(mut_track[sample_index], scales);

#else

mut_track[sample_index] = pack_vector4_uXX(mut_track[sample_index], scales);

#endif

}
}

Expand Down
Loading