Skip to content

Commit

Permalink
use better fix16 multiply..
Browse files Browse the repository at this point in the history
  • Loading branch information
sctanf committed Aug 4, 2024
1 parent cc5a5c3 commit d2a9b16
Show file tree
Hide file tree
Showing 2 changed files with 107 additions and 7 deletions.
47 changes: 40 additions & 7 deletions dsp/dsp.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#ifndef SCTANFDSP_H
#define SCTANFDSP_H

#define USE_FIX16_MUL

#include <stdio.h>

#include "pico/stdlib.h"
Expand All @@ -24,12 +26,43 @@ typedef int32_t dspfx;
#define fxabs2(a) ((a)<0?-(a):(a))

#define fpformat3 28
#define mulfx0(a,b) ((int64_t)(a)*(int64_t)(b))
//#define mulfx0(a,b) ((int64_t)(a)*(int64_t)(b))
#define mulfx3(a,b) ((dspfx)(((int64_t)(a)*(int64_t)(b))>>fpformat3))
#define fxint3(a) ((a)>>fpformat3)
#define mulshift(a) ((dspfx)fxint3(a))
//#define mulshift(a) ((dspfx)fxint3(a))
#define floatfx3(a) ((int64_t)((a)*(1<<fpformat3)))

#ifdef USE_FIX16_MUL
typedef int32_t fix3_28_t;

// https://github.com/ploopyco/headphones/blob/master/firmware/code/fix16.inl
static inline fix3_28_t fix16_mul(fix3_28_t inArg0, fix3_28_t inArg1) {
int32_t A = (inArg0 >> 14), C = (inArg1 >> 14);
uint32_t B = (inArg0 & 0x3FFF), D = (inArg1 & 0x3FFF);
int32_t AC = A*C;
int32_t AD_CB = A*D + C*B;
int32_t product_hi = AC + (AD_CB >> 14);

#if HANDLE_CARRY
// Handle carry from lower bits to upper part of result.
uint32_t BD = B*D;
uint32_t ad_cb_temp = AD_CB << 14;
uint32_t product_lo = BD + ad_cb_temp;

if (product_lo < BD)
product_hi++;
#endif

return product_hi;
}

#define mulfx0(a,b) fix16_mul(a,b)
#define mulshift(a) (a)
#else
#define mulfx0(a,b) ((int64_t)(a)*(int64_t)(b))
#define mulshift(a) ((dspfx)fxint3(a))
#endif

typedef struct {
const dspfx k;
dspfx y1, z1, u1;
Expand Down Expand Up @@ -63,12 +96,12 @@ static inline void process_fwi(fwi *const filter, int16_t iters, int32_t *in, in

static inline void process_biquad(biquad *const filter, int64_t a0, int64_t a1, int64_t a2, int64_t b1, int64_t b2, int16_t iters, int32_t *in, int32_t *out) {
int16_t iters2 = iters * 2;
out[0] = mulshift(mulfx0(a0, in[0]) + mulfx0(a1, filter->a1z) + mulfx0(a2, filter->a2z) - mulfx0(b1, filter->b1z) - mulfx0(b2, filter->b2z));
out[2] = mulshift(mulfx0(a0, in[2]) + mulfx0(a1, in[0]) + mulfx0(a2, filter->a1z) - mulfx0(b1, out[0]) - mulfx0(b2, filter->b1z));
out[1] = mulshift(mulfx0(a0, in[1]) + mulfx0(a1, filter->a1zr) + mulfx0(a2, filter->a2zr) - mulfx0(b1, filter->b1zr) - mulfx0(b2, filter->b2zr));
out[3] = mulshift(mulfx0(a0, in[3]) + mulfx0(a1, in[1]) + mulfx0(a2, filter->a1zr) - mulfx0(b1, out[1]) - mulfx0(b2, filter->b1zr));
out[0] = mulshift(mulfx0(a0, in[0]) + mulfx0(a1, filter->a1z) - mulfx0(b1, filter->b1z) + mulfx0(a2, filter->a2z) - mulfx0(b2, filter->b2z));
out[2] = mulshift(mulfx0(a0, in[2]) + mulfx0(a1, in[0]) - mulfx0(b1, out[0]) + mulfx0(a2, filter->a1z) - mulfx0(b2, filter->b1z));
out[1] = mulshift(mulfx0(a0, in[1]) + mulfx0(a1, filter->a1zr) - mulfx0(b1, filter->b1zr) + mulfx0(a2, filter->a2zr) - mulfx0(b2, filter->b2zr));
out[3] = mulshift(mulfx0(a0, in[3]) + mulfx0(a1, in[1]) - mulfx0(b1, out[1]) + mulfx0(a2, filter->a1zr) - mulfx0(b2, filter->b1zr));
for (int i = 4; i < iters2; i++) {
out[i] = mulshift(mulfx0(a0, in[i]) + mulfx0(a1, in[i - 2]) + mulfx0(a2, in[i - 4]) - mulfx0(b1, out[i - 2]) - mulfx0(b2, out[i - 4])); // takes up the most time by far..
out[i] = mulshift(mulfx0(a0, in[i]) + mulfx0(a1, in[i - 2]) - mulfx0(b1, out[i - 2]) + mulfx0(a2, in[i - 4]) - mulfx0(b2, out[i - 4])); // takes up the most time by far..
}
filter->a2z = in[iters2 - 4];
filter->b2z = out[iters2 - 4];
Expand Down
67 changes: 67 additions & 0 deletions main.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,19 @@ biquad(eq_bq_5)
biquad(eq_bq_6)
biquad(eq_bq_7)
biquad(eq_bq_8)
/*
biquad(eq_bq_9)
biquad(eq_bq_10)
biquad(eq_bq_11)
biquad(eq_bq_12)
biquad(eq_bq_13)
biquad(eq_bq_14)
biquad(eq_bq_15)
biquad(eq_bq_16)
biquad(eq_bq_17)
biquad(eq_bq_18)
*/
// now can run twice as many filters.. they are not used for now

bufring_t bufring1 = {
.len = 0,
Expand Down Expand Up @@ -128,6 +141,48 @@ bufring_t bufring1 = {
#define EQ_I_7 1.0,0.0,0.0,0.0,0.0
#endif

/*
#ifndef EQ_I_8
#define EQ_I_8 1.0,0.0,0.0,0.0,0.0
#endif
#ifndef EQ_I_9
#define EQ_I_9 1.0,0.0,0.0,0.0,0.0
#endif
#ifndef EQ_I_10
#define EQ_I_10 1.0,0.0,0.0,0.0,0.0
#endif
#ifndef EQ_I_11
#define EQ_I_11 1.0,0.0,0.0,0.0,0.0
#endif
#ifndef EQ_I_12
#define EQ_I_12 1.0,0.0,0.0,0.0,0.0
#endif
#ifndef EQ_I_13
#define EQ_I_13 1.0,0.0,0.0,0.0,0.0
#endif
#ifndef EQ_I_14
#define EQ_I_14 1.0,0.0,0.0,0.0,0.0
#endif
#ifndef EQ_I_15
#define EQ_I_15 1.0,0.0,0.0,0.0,0.0
#endif
#ifndef EQ_I_16
#define EQ_I_16 1.0,0.0,0.0,0.0,0.0
#endif
#ifndef EQ_I_17
#define EQ_I_17 1.0,0.0,0.0,0.0,0.0
#endif
*/

int32_t actual_vol = 0;
#define VOL_STEP 600000

Expand Down Expand Up @@ -594,6 +649,18 @@ static void __not_in_flash_func(_as_audio_packet)(struct usb_endpoint *ep) { //
process_biquad(&eq_bq_6, biquadconstsfx(EQ_I_5), count, buf1, buf0);
process_biquad(&eq_bq_7, biquadconstsfx(EQ_I_6), count, buf0, buf1);
process_biquad(&eq_bq_8, biquadconstsfx(EQ_I_7), count, buf1, buf0);
/*
process_biquad(&eq_bq_9, biquadconstsfx(EQ_I_8), count, buf0, buf1);
process_biquad(&eq_bq_10, biquadconstsfx(EQ_I_9), count, buf1, buf0);
process_biquad(&eq_bq_11, biquadconstsfx(EQ_I_10), count, buf0, buf1);
process_biquad(&eq_bq_12, biquadconstsfx(EQ_I_11), count, buf1, buf0);
process_biquad(&eq_bq_13, biquadconstsfx(EQ_I_12), count, buf0, buf1);
process_biquad(&eq_bq_14, biquadconstsfx(EQ_I_13), count, buf1, buf0);
process_biquad(&eq_bq_15, biquadconstsfx(EQ_I_14), count, buf0, buf1);
process_biquad(&eq_bq_16, biquadconstsfx(EQ_I_15), count, buf1, buf0);
process_biquad(&eq_bq_17, biquadconstsfx(EQ_I_16), count, buf0, buf1);
process_biquad(&eq_bq_18, biquadconstsfx(EQ_I_17), count, buf1, buf0);
*/
#endif
for (int i = 0; i < count * 2; i += 2) { // 25 us
if (actual_vol - VOL_STEP > vol_mul) actual_vol -= VOL_STEP;
Expand Down

0 comments on commit d2a9b16

Please sign in to comment.