quartiq · jordens · Jul 20, 2023 · Jul 12, 2023 · Jul 12, 2023 · Jul 12, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,21 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## Unreleased
+
+### Changed
+
+* `filter` mod added to allow being generic about the `Filter` trait.
+  This is currently `i32 -> i32` filtering (SISO, no batches) only and
+  pretty simple but it allows filter composition, chaining, repetition,
+  and handles parameters/configuration.
+* `pll` reworked to use FMA instead of shifts. These are faster on the target
+  architecture and crucially important to increase dynamic range and bias.
+  PLL now works fine even for very small feedback gains and maintains accuracy.
+* `lowpass` reworked to use the new `Filter` trait. Also reworked to use FMA
+  instead of shifts for greatly improved performance at low corner frequencies.
+  Second order lowpass added.
+
 ## [0.9.2](https://github.com/quartiq/idsp/compare/v0.9.1..v0.9.2) - 2022-11-27
 
 ### Changed

diff --git a/src/atan2.rs b/src/atan2.rs
@@ -15,10 +15,10 @@ fn atani(x: u32) -> u32 {
     const A: [i32; 6] = [
         0x0517c2cd,
         -0x06c6496b,
-        0x0fbdb026,
-        -0x25b32e58,
-        0x43b34e3c,
-        -0x3bc82700,
+        0x0fbdb021,
+        -0x25b32e0a,
+        0x43b34c81,
+        -0x3bc823dd,
     ];
     let x = x as i64;
     let x2 = ((x * x) >> 32) as i32 as i64;

diff --git a/src/filter.rs b/src/filter.rs
@@ -0,0 +1,70 @@
+pub trait Filter {
+    type Config;
+    /// Update the filter with a new sample.
+    ///
+    /// # Args
+    /// * `x`: Input data.
+    /// * `k`: Filter configuration.
+    ///
+    /// # Return
+    /// Filtered output y.
+    fn update(&mut self, x: i32, k: &Self::Config) -> i32;
+    /// Return the current filter output
+    fn get(&self) -> i32;
+    /// Update the filter so that it outputs the provided value.
+    /// This does not completely define the state of the filter.
+    fn set(&mut self, x: i32);
+}
+
+#[derive(Copy, Clone, Default)]
+pub struct Nyquist(pub(crate) i32);
+impl Filter for Nyquist {
+    type Config = ();
+    fn update(&mut self, x: i32, _k: &Self::Config) -> i32 {
+        let x = x >> 1; // x/2 for less bias but more distortion
+        let y = x.wrapping_add(self.0);
+        self.0 = x;
+        y
+    }
+    fn get(&self) -> i32 {
+        self.0
+    }
+    fn set(&mut self, x: i32) {
+        self.0 = x;
+    }
+}
+
+#[derive(Copy, Clone)]
+pub struct Chain<const N: usize, T>(pub(crate) [T; N]);
+impl<const N: usize, T: Filter> Filter for Chain<N, T> {
+    type Config = T::Config;
+    fn update(&mut self, x: i32, k: &Self::Config) -> i32 {
+        self.0.iter_mut().fold(x, |x, stage| stage.update(x, k))
+    }
+    fn get(&self) -> i32 {
+        self.0[N - 1].get()
+    }
+    fn set(&mut self, x: i32) {
+        self.0.iter_mut().for_each(|stage| stage.set(x));
+    }
+}
+impl<const N: usize, T: Default + Copy> Default for Chain<N, T> {
+    fn default() -> Self {
+        Self([T::default(); N])
+    }
+}
+
+#[derive(Copy, Clone, Default)]
+pub struct Cascade<T, U>(pub(crate) T, U);
+impl<T: Filter, U: Filter> Filter for Cascade<T, U> {
+    type Config = (T::Config, U::Config);
+    fn update(&mut self, x: i32, k: &Self::Config) -> i32 {
+        self.1.update(self.0.update(x, &k.0), &k.1)
+    }
+    fn get(&self) -> i32 {
+        self.1.get()
+    }
+    fn set(&mut self, x: i32) {
+        self.1.set(x)
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
@@ -6,6 +6,8 @@ mod atan2;
 pub use atan2::*;
 mod accu;
 pub use accu::*;
+mod filter;
+pub use filter::*;
 mod complex;
 pub use complex::*;
 mod cossin;

diff --git a/src/lockin.rs b/src/lockin.rs
@@ -1,13 +1,13 @@
-use super::{Complex, ComplexExt, Lowpass, MulScaled};
+use super::{Complex, ComplexExt, Filter, MulScaled};
 
 #[derive(Copy, Clone, Default)]
-pub struct Lockin<const N: usize> {
-    state: [Lowpass<N>; 2],
+pub struct Lockin<T> {
+    state: [T; 2],
 }
 
-impl<const N: usize> Lockin<N> {
+impl<T: Filter> Lockin<T> {
     /// Update the lockin with a sample taken at a local oscillator IQ value.
-    pub fn update_iq(&mut self, sample: i32, lo: Complex<i32>, k: u32) -> Complex<i32> {
+    pub fn update_iq(&mut self, sample: i32, lo: Complex<i32>, k: &T::Config) -> Complex<i32> {
         let mix = lo.mul_scaled(sample);
 
         // Filter with the IIR lowpass,
@@ -19,7 +19,7 @@ impl<const N: usize> Lockin<N> {
     }
 
     /// Update the lockin with a sample taken at a given phase.
-    pub fn update(&mut self, sample: i32, phase: i32, k: u32) -> Complex<i32> {
+    pub fn update(&mut self, sample: i32, phase: i32, k: &T::Config) -> Complex<i32> {
         // Get the LO signal for demodulation and mix the sample;
         self.update_iq(sample, Complex::from_angle(phase), k)
     }

diff --git a/src/lowpass.rs b/src/lowpass.rs
@@ -1,44 +1,71 @@
+use crate::Filter;
+
 /// Arbitrary order, high dynamic range, wide coefficient range,
 /// lowpass filter implementation. DC gain is 1.
 ///
-/// Type argument N is the filter order.
+/// Type argument N is the filter order. N must be `1` or `2`.
+///
+/// The filter will cleanly saturate towards the `i32` range.
+///
+/// The filter configuration `Config` contains its gains.
+///
+/// For the first-order lowpass this the corner frequency in scaled Q31:
+/// `k = pi*(1 << 31)*f0/fn` where
+/// `f0` is the 3dB corner frequency and
+/// `fn` is the Nyquist frequency.
+/// The corner frequency is warped in the usual way.
+///
+/// For the second-order lowpass this is `[k**2/(1 << 32), -k/q]` with `q = 1/sqrt(2)`
+/// for a Butterworth response.
+/// In addition to the poles at the corner frequency The filters have zeros at Nyquist.
+///
+/// The first-order lowpass works fine and accurate for any positive gain
+/// `1 <= k <= (1 << 31) - 1`.
+/// The second-order lowpass works and is accurate for
+/// `1 << 16 <= k <= q*(1 << 31)`.
+///
+/// Both filters have been optimized for accuracy, dynamic range, and
+/// speed on Cortex-M7.
 #[derive(Copy, Clone)]
-pub struct Lowpass<const N: usize> {
-    // IIR state storage
-    y: [i32; N],
+pub struct Lowpass<const N: usize>(pub(crate) [i64; N]);
+impl<const N: usize> Filter for Lowpass<N> {
+    type Config = [i32; N];
+    fn update(&mut self, x: i32, k: &Self::Config) -> i32 {
+        let mut d = x.saturating_sub((self.0[0] >> 32) as i32) as i64 * k[0] as i64;
+        let y;
+        if N >= 2 {
+            d += (self.0[1] >> 32) * k[1] as i64;
+            self.0[1] += d;
+            self.0[0] += self.0[1];
+            y = self.get();
+            // This creates the double Nyquist zero,
+            // compensates the gain lost in the signed i32 as (i32 as i64)*(i64 >> 32)
+            // multiplication while keeping the lowest bit significant, and
+            // copes better with wrap-around than Nyquist averaging.
+            self.0[0] += self.0[1];
+            self.0[1] += d;
+        } else {
+            self.0[0] += d;
+            y = self.get();
+            self.0[0] += d;
+        }
+        y
+    }
+    fn get(&self) -> i32 {
+        (self.0[0] >> 32) as i32
+    }
+    fn set(&mut self, x: i32) {
+        self.0[0] = (x as i64) << 32;
+    }
 }
 
 impl<const N: usize> Default for Lowpass<N> {
     fn default() -> Self {
-        Lowpass { y: [0i32; N] }
+        Self([0; N])
     }
 }
 
-impl<const N: usize> Lowpass<N> {
-    /// Update the filter with a new sample.
-    ///
-    /// # Args
-    /// * `x`: Input data. Needs 1 bit headroom but will saturate cleanly beyond that.
-    /// * `k`: Log2 time constant, 1..=31.
-    ///
-    /// # Return
-    /// Filtered output y.
-    pub fn update(&mut self, x: i32, k: u32) -> i32 {
-        debug_assert!(k & 31 == k);
-        // This is an unrolled and optimized first-order IIR loop
-        // that works for all possible time constants.
-        // Note T-DF-I and the zeros at Nyquist.
-        let mut x = x;
-        for y in self.y.iter_mut() {
-            let dy = x.saturating_sub(*y) >> k;
-            *y += dy;
-            x = *y - (dy >> 1);
-        }
-        x.saturating_add((N as i32) << (k - 1).max(0))
-    }
-
-    /// Return the current filter output
-    pub fn output(&self) -> i32 {
-        self.y[N - 1]
-    }
-}
+/// First order lowpass
+pub type Lowpass1 = Lowpass<1>;
+/// Second order lowpass
+pub type Lowpass2 = Lowpass<2>;
diff --git a/src/pll.rs b/src/pll.rs
@@ -38,9 +38,9 @@ pub struct PLL {
     // last input phase
     x: i32,
     // filtered frequency
-    f: i32,
+    f: i64,
     // filtered output phase
-    y: i32,
+    y: i64,
 }
 
 impl PLL {
@@ -49,48 +49,39 @@ impl PLL {
     ///
     /// Args:
     /// * `x`: New input phase sample or None if a sample has been missed.
-    /// * `shift_frequency`: Frequency error scaling. The frequency gain per update is
-    ///   `1/(1 << shift_frequency)`.
-    /// * `shift_phase`: Phase error scaling. The phase gain is `1/(1 << shift_phase)`
-    ///   per update. A good value is typically `shift_frequency - 1`.
+    /// * `k`: Feedback gain.
     ///
     /// Returns:
     /// A tuple of instantaneous phase and frequency estimates.
-    pub fn update(&mut self, x: Option<i32>, shift_frequency: u32, shift_phase: u32) -> (i32, i32) {
-        debug_assert!((1..=30).contains(&shift_frequency));
-        debug_assert!((1..=30).contains(&shift_phase));
+    pub fn update(&mut self, x: Option<i32>, k: i32) -> (i32, i32) {
         if let Some(x) = x {
-            let df = (1i32 << (shift_frequency - 1))
-                .wrapping_add(x)
-                .wrapping_sub(self.x)
-                .wrapping_sub(self.f)
-                >> shift_frequency;
+            let dx = x.wrapping_sub(self.x);
             self.x = x;
+            let df = dx.wrapping_sub((self.f >> 32) as i32) as i64 * k as i64;
             self.f = self.f.wrapping_add(df);
-            let f = self.f.wrapping_sub(df >> 1);
-            self.y = self.y.wrapping_add(f);
-            let dy = (1i32 << (shift_phase - 1))
-                .wrapping_add(x)
-                .wrapping_sub(self.y)
-                >> shift_phase;
+            let f = (self.f >> 32) as i32;
+            self.y = self.y.wrapping_add(self.f);
+            self.f = self.f.wrapping_add(df);
+            let dy = x.wrapping_sub((self.y >> 32) as i32) as i64 * k as i64;
+            self.y = self.y.wrapping_add(dy);
+            let y = (self.y >> 32) as i32;
             self.y = self.y.wrapping_add(dy);
-            let y = self.y.wrapping_sub(dy >> 1);
-            (y, f.wrapping_add(dy))
+            (y, f)
         } else {
-            self.x = self.x.wrapping_add(self.f);
             self.y = self.y.wrapping_add(self.f);
-            (self.y, self.f)
+            self.x = self.x.wrapping_add((self.f >> 32) as i32);
+            ((self.y >> 32) as _, (self.f >> 32) as _)
         }
     }
 
     /// Return the current phase estimate
     pub fn phase(&self) -> i32 {
-        self.y
+        (self.y >> 32) as _
     }
 
     /// Return the current frequency estimate
     pub fn frequency(&self) -> i32 {
-        self.f
+        (self.f >> 32) as _
     }
 }
 
@@ -100,28 +91,27 @@ mod tests {
     #[test]
     fn mini() {
         let mut p = PLL::default();
-        let (y, f) = p.update(Some(0x10000), 8, 4);
-        assert_eq!(y, 0x87c);
-        assert_eq!(f, 0x1078);
+        let k = 1 << 24;
+        let (y, f) = p.update(Some(0x10000), k);
+        assert_eq!(y, 0x1ff);
+        assert_eq!(f, 0x100);
     }
 
     #[test]
     fn converge() {
         let mut p = PLL::default();
+        let k = 1 << 24;
         let f0 = 0x71f63049_i32;
-        let shift = (10, 9);
-        let n = 31 << shift.0 + 2;
+        let n = 1 << 14;
         let mut x = 0i32;
         for i in 0..n {
             x = x.wrapping_add(f0);
-            let (y, f) = p.update(Some(x), shift.0, shift.1);
+            let (y, f) = p.update(Some(x), k);
             if i > n / 4 {
-                // The remaining error would be removed by dithering.
-                assert_eq!(f.wrapping_sub(f0).abs() <= 1 << 10, true);
+                assert_eq!(f.wrapping_sub(f0).abs() <= 1, true);
             }
             if i > n / 2 {
-                // The remaining error would be removed by dithering.
-                assert_eq!(y.wrapping_sub(x).abs() < 1 << 18, true);
+                assert_eq!(y.wrapping_sub(x).abs() <= 1, true);
             }
         }
     }