Skip to content

Commit

Permalink
[opus] Update opus to 1.5.2
Browse files Browse the repository at this point in the history
  • Loading branch information
starg2 committed Apr 18, 2024
1 parent dce0bfa commit ca98f20
Show file tree
Hide file tree
Showing 6 changed files with 77 additions and 15 deletions.
52 changes: 50 additions & 2 deletions opus/celt/arm/armcpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ static OPUS_INLINE opus_uint32 opus_cpu_capabilities(void){
/* Linux based */
#include <stdio.h>

opus_uint32 opus_cpu_capabilities(void)
static opus_uint32 opus_cpu_capabilities(void)
{
opus_uint32 flags = 0;
FILE *cpuinfo;
Expand Down Expand Up @@ -169,7 +169,7 @@ opus_uint32 opus_cpu_capabilities(void)
#include <sys/types.h>
#include <sys/sysctl.h>

opus_uint32 opus_cpu_capabilities(void)
static opus_uint32 opus_cpu_capabilities(void)
{
opus_uint32 flags = 0;

Expand All @@ -191,6 +191,54 @@ opus_uint32 opus_cpu_capabilities(void)
return flags;
}

#elif defined(__FreeBSD__)
#include <sys/auxv.h>

static opus_uint32 opus_cpu_capabilities(void)
{
long hwcap = 0;
opus_uint32 flags = 0;

# if defined(OPUS_ARM_MAY_HAVE_MEDIA) \
|| defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
/* FreeBSD requires armv6+, which always supports media instructions */
flags |= OPUS_CPU_ARM_MEDIA_FLAG;
# endif

elf_aux_info(AT_HWCAP, &hwcap, sizeof hwcap);

# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_MEDIA) \
|| defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
# ifdef HWCAP_EDSP
if (hwcap & HWCAP_EDSP)
flags |= OPUS_CPU_ARM_EDSP_FLAG;
# endif

# if defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
# ifdef HWCAP_NEON
if (hwcap & HWCAP_NEON)
flags |= OPUS_CPU_ARM_NEON_FLAG;
# elif defined(HWCAP_ASIMD)
if (hwcap & HWCAP_ASIMD)
flags |= OPUS_CPU_ARM_NEON_FLAG | OPUS_CPU_ARM_MEDIA_FLAG | OPUS_CPU_ARM_EDSP_FLAG;
# endif
# endif
# if defined(OPUS_ARM_MAY_HAVE_DOTPROD) && defined(HWCAP_ASIMDDP)
if (hwcap & HWCAP_ASIMDDP)
flags |= OPUS_CPU_ARM_DOTPROD_FLAG;
# endif
# endif

#if defined(OPUS_ARM_PRESUME_AARCH64_NEON_INTR)
flags |= OPUS_CPU_ARM_EDSP_FLAG | OPUS_CPU_ARM_MEDIA_FLAG | OPUS_CPU_ARM_NEON_FLAG;
# if defined(OPUS_ARM_PRESUME_DOTPROD)
flags |= OPUS_CPU_ARM_DOTPROD_FLAG;
# endif
#endif

return (flags);
}

#else
/* The feature registers which can tell us what the processor supports are
* accessible in priveleged modes only, so we can't have a general user-space
Expand Down
2 changes: 1 addition & 1 deletion opus/celt/x86/x86cpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
(defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
(defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(OPUS_X86_PRESUME_AVX2)))

#if defined(_WIN32)
#if defined(_MSC_VER)

#include <intrin.h>
static _inline void cpuid(unsigned int CPUInfo[4], unsigned int InfoType)
Expand Down
18 changes: 16 additions & 2 deletions opus/celt/x86/x86cpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,22 @@ int opus_select_arch(void);
Use this to work around those restrictions (which should hopefully all get
optimized to a single MOVD instruction).
GCC implemented _mm_loadu_si32() since GCC 11; HOWEVER, there is a bug!
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99754 */
# if !defined(_MSC_VER) && !OPUS_GNUC_PREREQ(11,3) && !(defined(__clang__) && (__clang_major__ >= 8))
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99754
LLVM implemented _mm_loadu_si32() since Clang 8.0, however the
__clang_major__ version number macro is unreliable, as vendors
(specifically, Apple) will use different numbering schemes than upstream.
Clang's advice is "use feature detection", but they do not provide feature
detection support for specific SIMD functions.
We follow the approach from the SIMDe project and instead detect unrelated
features that should be available in the version we want (see
<https://github.com/simd-everywhere/simde/blob/master/simde/simde-detect-clang.h>).*/
# if defined(__clang__)
# if __has_warning("-Wextra-semi-stmt") || \
__has_builtin(__builtin_rotateleft32)
# define OPUS_CLANG_8 (1)
# endif
# endif
# if !defined(_MSC_VER) && !OPUS_GNUC_PREREQ(11,3) && !defined(OPUS_CLANG_8)
# include <string.h>
# include <emmintrin.h>

Expand Down
15 changes: 7 additions & 8 deletions opus/silk/x86/NSQ_del_dec_avx2.c
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,6 @@ static OPUS_INLINE int verify_assumptions(const silk_encoder_state *psEncC)
/* Intrinsics not defined on MSVC */
#ifdef _MSC_VER
#include <Intsafe.h>
#define __m128i_u __m128i
static inline int __builtin_sadd_overflow(opus_int32 a, opus_int32 b, opus_int32* res)
{
*res = a+b;
Expand Down Expand Up @@ -959,7 +958,7 @@ static OPUS_INLINE void silk_nsq_del_dec_scale_states_avx2(
{
__m256i x = _mm256_cvtepi16_epi64(_mm_loadu_si64(&x16[i]));
x = _mm256_slli_epi64(_mm256_mul_epi32(x, _mm256_set1_epi32(inv_gain_Q26)), 16);
_mm_storeu_si128((__m128i_u*)&x_sc_Q10[i], silk_cvtepi64_epi32_high(x));
_mm_storeu_si128((__m128i*)&x_sc_Q10[i], silk_cvtepi64_epi32_high(x));
}

/* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16 */
Expand All @@ -985,8 +984,8 @@ static OPUS_INLINE void silk_nsq_del_dec_scale_states_avx2(
/* Scale long-term shaping state */
for (i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_shp_buf_idx; i+=4)
{
__m128i_u* p = (__m128i_u*)&NSQ->sLTP_shp_Q14[i];
*p = silk_mm_smulww_epi32(*p, gain_adj_Q16);
opus_int32 *p = &NSQ->sLTP_shp_Q14[i];
_mm_storeu_si128((__m128i*)p, silk_mm_smulww_epi32(_mm_loadu_si128((__m128i*)p), gain_adj_Q16));
}

/* Scale long-term prediction state */
Expand Down Expand Up @@ -1041,13 +1040,13 @@ static OPUS_INLINE void silk_LPC_analysis_filter_avx2(
/* Allowing wrap around so that two wraps can cancel each other. The rare
cases where the result wraps around can only be triggered by invalid streams*/

__m256i in_v = _mm256_cvtepi16_epi32(_mm_loadu_si128((__m128i_u*)&in_ptr[-8]));
__m256i B_v = _mm256_cvtepi16_epi32(_mm_loadu_si128((__m128i_u*)& B[0]));
__m256i in_v = _mm256_cvtepi16_epi32(_mm_loadu_si128((__m128i*)&in_ptr[-8]));
__m256i B_v = _mm256_cvtepi16_epi32(_mm_loadu_si128((__m128i*)& B[0]));
__m256i sum = _mm256_mullo_epi32(in_v, silk_mm256_reverse_epi32(B_v));
if (order > 10)
{
in_v = _mm256_cvtepi16_epi32(_mm_loadu_si128((__m128i_u*)&in_ptr[-16]));
B_v = _mm256_cvtepi16_epi32(_mm_loadu_si128((__m128i_u*)&B [8]));
in_v = _mm256_cvtepi16_epi32(_mm_loadu_si128((__m128i*)&in_ptr[-16]));
B_v = _mm256_cvtepi16_epi32(_mm_loadu_si128((__m128i*)&B [8]));
B_v = silk_mm256_reverse_epi32(B_v);
}
else
Expand Down
2 changes: 1 addition & 1 deletion opus/src/opus_private.h
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ int opus_multistream_decode_native(

opus_int32 opus_packet_extensions_parse(const unsigned char *data, opus_int32 len, opus_extension_data *extensions, opus_int32 *nb_extensions);

opus_int32 opus_packet_extensions_generate(unsigned char *data, opus_int32 len, const opus_extension_data *extensions, int nb_extensions, int pad);
opus_int32 opus_packet_extensions_generate(unsigned char *data, opus_int32 len, const opus_extension_data *extensions, opus_int32 nb_extensions, int pad);

opus_int32 opus_packet_extensions_count(const unsigned char *data, opus_int32 len);

Expand Down
3 changes: 2 additions & 1 deletion opus/src/repacketizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,8 @@ opus_int32 opus_repacketizer_out_range_impl(OpusRepacketizer *rp, int begin, int
/* incorporate any extensions from the repacketizer padding */
for (i=begin;i<end;i++)
{
int frame_ext_count, j;
int j;
opus_int32 frame_ext_count;
frame_ext_count = total_ext_count - ext_count;
int ret = opus_packet_extensions_parse(rp->paddings[i], rp->padding_len[i],
&all_extensions[ext_count], &frame_ext_count);
Expand Down

0 comments on commit ca98f20

Please sign in to comment.