diff --git a/src/CodeGen_Xtensa_vectors.template.cpp b/src/CodeGen_Xtensa_vectors.template.cpp index 93db72504887..89e13da46ad8 100644 --- a/src/CodeGen_Xtensa_vectors.template.cpp +++ b/src/CodeGen_Xtensa_vectors.template.cpp @@ -2849,6 +2849,24 @@ HALIDE_ALWAYS_INLINE HALIDE_MAYBE_UNUSED native_vector_i16 gather_load(offset) << 1)); } +template<> +HALIDE_ALWAYS_INLINE HALIDE_MAYBE_UNUSED native_vector_i16_x2 gather_load(const void *base, const native_vector_i32_x4 &offset) { + // NOTE(aelphy): the shift is needed because offests are expected to be in bytes + native_vector_u16 offset0 = convert( + native_vector_i32_x2(native_vector_i32_x2::from_native_vector, + offset.native_vector[0], offset.native_vector[1])); + native_vector_u16 offset1 = convert( + native_vector_i32_x2(native_vector_i32_x2::from_native_vector, + offset.native_vector[2], offset.native_vector[3])); + + auto gsr0 = IVP_GATHERANX16((const int16_t *)base, offset0 << 1); + auto gsr1 = IVP_GATHERANX16((const int16_t *)base, offset1 << 1); + + return native_vector_i16_x2(native_vector_i16_x2::from_native_vector, + IVP_GATHERDNX16(gsr0), + IVP_GATHERDNX16(gsr1)); +} + template<> HALIDE_ALWAYS_INLINE HALIDE_MAYBE_UNUSED native_vector_u16 gather_load(const void *base, const native_vector_i32_x2 &offset) { // NOTE(aelphy): the shift is needed because offests are expected to be in bytes