From 119c0d2d9cf139f16f622c47a79bba866fcd11b7 Mon Sep 17 00:00:00 2001
From: Michael Herstine <sp1ff@pobox.com>
Date: Wed, 1 Jun 2022 07:00:07 -0700
Subject: [PATCH] First implementation of the `visualization` output plugin.

This commit contains the first implementation of an output
plugin that streams sound analysis to clients (presumably
visualizers of some sort).
---
 .github/workflows/build.yml                   |   7 +-
 .github/workflows/build_android.yml           |   4 +-
 meson_options.txt                             |   2 +
 src/lib/fmt/ThreadIdFormatter.hxx             |  22 +
 src/output/Registry.cxx                       |   4 +
 src/output/plugins/meson.build                |  18 +
 .../visualization/LowLevelProtocol.hxx        |  57 ++
 src/output/plugins/visualization/Protocol.cxx |  46 +
 src/output/plugins/visualization/Protocol.hxx | 138 +++
 .../plugins/visualization/SoundAnalysis.cxx   | 422 ++++++++
 .../plugins/visualization/SoundAnalysis.hxx   | 307 ++++++
 .../plugins/visualization/SoundInfoCache.cxx  | 244 +++++
 .../plugins/visualization/SoundInfoCache.hxx  | 104 ++
 .../visualization/VisualizationClient.cxx     | 534 ++++++++++
 .../visualization/VisualizationClient.hxx     | 273 +++++
 .../VisualizationOutputPlugin.cxx             | 712 +++++++++++++
 .../VisualizationOutputPlugin.hxx             | 147 +++
 .../visualization/VisualizationServer.cxx     | 106 ++
 .../visualization/VisualizationServer.hxx     | 119 +++
 src/pcm/AudioFormat.hxx                       |   4 +
 test/TestVisualization.cxx                    | 953 ++++++++++++++++++
 test/meson.build                              |  35 +
 test/run_vis.cxx                              | 336 ++++++
 23 files changed, 4590 insertions(+), 4 deletions(-)
 create mode 100644 src/lib/fmt/ThreadIdFormatter.hxx
 create mode 100644 src/output/plugins/visualization/LowLevelProtocol.hxx
 create mode 100644 src/output/plugins/visualization/Protocol.cxx
 create mode 100644 src/output/plugins/visualization/Protocol.hxx
 create mode 100644 src/output/plugins/visualization/SoundAnalysis.cxx
 create mode 100644 src/output/plugins/visualization/SoundAnalysis.hxx
 create mode 100644 src/output/plugins/visualization/SoundInfoCache.cxx
 create mode 100644 src/output/plugins/visualization/SoundInfoCache.hxx
 create mode 100644 src/output/plugins/visualization/VisualizationClient.cxx
 create mode 100644 src/output/plugins/visualization/VisualizationClient.hxx
 create mode 100644 src/output/plugins/visualization/VisualizationOutputPlugin.cxx
 create mode 100644 src/output/plugins/visualization/VisualizationOutputPlugin.hxx
 create mode 100644 src/output/plugins/visualization/VisualizationServer.cxx
 create mode 100644 src/output/plugins/visualization/VisualizationServer.hxx
 create mode 100644 test/TestVisualization.cxx
 create mode 100644 test/run_vis.cxx

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 241285bc8c..df7f342fe1 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -88,7 +88,8 @@ jobs:
             libupnp-dev \
             libsqlite3-dev \
             libchromaprint-dev \
-            libgcrypt20-dev
+            libgcrypt20-dev \
+            libfftw3-dev
 
       - name: Full Build
         uses: BSFishy/meson-build@v1.0.3
@@ -152,7 +153,8 @@ jobs:
             libvorbis \
             faad2 \
             wavpack \
-            libmpdclient
+            libmpdclient \
+            fftw
 
       - name: Build
         uses: BSFishy/meson-build@v1.0.3
@@ -193,6 +195,7 @@ jobs:
             dbus:p
             faad2:p
             ffmpeg:p
+            fftw:p
             fmt:p
             flac:p
             gtest:p
diff --git a/.github/workflows/build_android.yml b/.github/workflows/build_android.yml
index f17f2ca70e..2997c7b91e 100644
--- a/.github/workflows/build_android.yml
+++ b/.github/workflows/build_android.yml
@@ -40,7 +40,7 @@ jobs:
           sudo apt-get update
           sudo apt-get install -y --no-install-recommends \
             ninja-build \
-            quilt 
+            quilt
           pip3 install --user meson==1.3.0
             
       - name: Build
@@ -49,7 +49,7 @@ jobs:
           cd ./output/android
           ../../android/build.py $ANDROID_SDK_ROOT $ANDROID_NDK_LATEST_HOME arm64-v8a \
             --buildtype=debugoptimized -Db_ndebug=true \
-            -Dwrap_mode=forcefallback 
+            -Dwrap_mode=forcefallback -Dvisualization=false
 
           cd -
           cd ./android
diff --git a/meson_options.txt b/meson_options.txt
index f887cb170a..5911d6f5af 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -186,6 +186,8 @@ option('shout', type: 'feature', description: 'Shoutcast streaming support using
 option('snapcast', type: 'boolean', value: true, description: 'Snapcast output plugin')
 option('sndio', type: 'feature', description: 'sndio output plugin')
 option('solaris_output', type: 'feature', description: 'Solaris /dev/audio support')
+option('visualization', type: 'boolean', value: true, description: 'Visualization output plugin')
+option('fftw3', type: 'feature', description: 'FFTW support')
 
 #
 # Misc libraries
diff --git a/src/lib/fmt/ThreadIdFormatter.hxx b/src/lib/fmt/ThreadIdFormatter.hxx
new file mode 100644
index 0000000000..a687ee86e3
--- /dev/null
+++ b/src/lib/fmt/ThreadIdFormatter.hxx
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: BSD-2-Clause
+// author: Max Kellermann <max.kellermann@gmail.com>
+
+#ifndef THREAD_ID_FORMATTER_HXX
+#define THREAD_ID_FORMATTER_HXX
+
+#include <fmt/format.h>
+#include <sstream>
+#include <thread>
+
+template<>
+struct fmt::formatter<std::thread::id> : formatter<string_view>
+{
+	template<typename FormatContext>
+	auto format(std::thread::id id, FormatContext &ctx) {
+		std::stringstream stm;
+		stm << id;
+		return formatter<string_view>::format(stm.str(), ctx);
+	}
+};
+
+#endif // THREAD_ID_FORMATTER_HXX
diff --git a/src/output/Registry.cxx b/src/output/Registry.cxx
index 3dacd3e5ed..c17233daf9 100644
--- a/src/output/Registry.cxx
+++ b/src/output/Registry.cxx
@@ -23,6 +23,7 @@
 #include "plugins/ShoutOutputPlugin.hxx"
 #include "plugins/sles/SlesOutputPlugin.hxx"
 #include "plugins/SolarisOutputPlugin.hxx"
+#include "plugins/visualization/VisualizationOutputPlugin.hxx"
 #ifdef ENABLE_WINMM_OUTPUT
 #include "plugins/WinmmOutputPlugin.hxx"
 #endif
@@ -89,6 +90,9 @@ constinit const AudioOutputPlugin *const audio_output_plugins[] = {
 #endif
 #ifdef ENABLE_WASAPI_OUTPUT
 	&wasapi_output_plugin,
+#endif
+#ifdef ENABLE_VISUALIZATION_OUTPUT
+	&visualization_output_plugin,
 #endif
 	nullptr
 };
diff --git a/src/output/plugins/meson.build b/src/output/plugins/meson.build
index c790a52c09..38aa09cc4a 100644
--- a/src/output/plugins/meson.build
+++ b/src/output/plugins/meson.build
@@ -160,6 +160,24 @@ else
   wasapi_dep = dependency('', required: false)
 endif
 
+libfftw3_dep = dependency('fftw3f', version: '>= 3.3.8', required: get_option('fftw3'))
+output_features.set('ENABLE_FFTW3', libfftw3_dep.found())
+output_features.set('ENABLE_VISUALIZATION_OUTPUT', get_option('visualization'))
+if get_option('visualization')
+  if not libfftw3_dep.found()
+    error('libfftw3 not available, but is required for the visualization plugin')
+  endif
+  output_plugins_sources += [
+    'visualization/VisualizationOutputPlugin.cxx',
+    'visualization/SoundAnalysis.cxx',
+    'visualization/SoundInfoCache.cxx',
+    'visualization/VisualizationServer.cxx',
+    'visualization/VisualizationClient.cxx',
+    'visualization/Protocol.cxx',
+  ]
+  output_plugins_deps += [ event_dep, net_dep, libfftw3_dep ]
+endif
+
 output_plugins = static_library(
   'output_plugins',
   output_plugins_sources,
diff --git a/src/output/plugins/visualization/LowLevelProtocol.hxx b/src/output/plugins/visualization/LowLevelProtocol.hxx
new file mode 100644
index 0000000000..f48f62f5f5
--- /dev/null
+++ b/src/output/plugins/visualization/LowLevelProtocol.hxx
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright The Music Player Daemon Project
+
+#ifndef LOW_LEVEL_PROTOCOL_HXX_INCLUDED
+#define LOW_LEVEL_PROTOCOL_HXX_INCLUDED
+
+#include "util/PackedBigEndian.hxx"
+
+#include <fftw3.h>
+
+#include <algorithm>
+#include <cstdint>
+#include <limits>
+
+namespace Visualization {
+
+/*  Write a uint16_t to an output iterator over byte in wire format; return the
+ *  iterator in its new position
+ */
+template <typename OutIter>
+OutIter
+SerializeU16(uint16_t n, OutIter pout) {
+	auto m = PackedBE16(n);
+	auto p = (std::byte*)(&m);
+	return std::copy(p, p + 2, pout);
+}
+
+static_assert(std::numeric_limits<float>::is_iec559);
+
+/* Convert an IEEE 754 single-precision floating-point number to wire format;
+ * write it to an output iterator & return the iterator in its new position
+ */
+template <typename OutIter>
+OutIter
+SerializeFloat(float f, OutIter pout) {
+	auto m = PackedBE32(*(uint32_t*)&f);
+	auto p = (std::byte*)(&m);
+	return std::copy(p, p + 4, pout);
+}
+
+/* Convert an fftwf_complex to wire format; write it to an output iterator &
+ * return the iterator in its new position
+ */
+template <typename OutIter>
+OutIter
+SerializeComplex(const fftwf_complex c, OutIter pout) {
+	auto r = PackedBE32(*(const uint32_t*)&(c[0]));
+	auto i = PackedBE32(*(const uint32_t*)&(c[1]));
+	auto pr = (std::byte*)(&r);
+	auto pi = (std::byte*)(&i);
+	pout = std::copy(pr, pr + 4, pout);
+	return std::copy(pi, pi + 4, pout);
+}
+
+} // namespace Visualization
+
+#endif // LOW_LEVEL_PROTOCOL_HXX_INCLUDED
diff --git a/src/output/plugins/visualization/Protocol.cxx b/src/output/plugins/visualization/Protocol.cxx
new file mode 100644
index 0000000000..94a76219d3
--- /dev/null
+++ b/src/output/plugins/visualization/Protocol.cxx
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright The Music Player Daemon Project
+
+#include "Protocol.hxx"
+
+#include "Log.hxx"
+#include "util/ByteOrder.hxx"
+#include "util/Domain.hxx"
+
+Visualization::ParseResult
+Visualization::ParseClihlo(void *data,
+			   size_t length,
+			   ClientHello &clihlo) noexcept {
+	// CLIHLO payload is 6 bytes, header & footer are five more.
+	if (length < sizeof(ClientHello) + 5) {
+		return ParseResult::NEED_MORE_DATA;
+	}
+
+	uint8_t *buf = (uint8_t *)data;
+
+	uint16_t msg_type = FromBE16(*(uint16_t *)buf);
+	if (msg_type != 0) {
+		return ParseResult::ERROR;
+	}
+
+	buf += 2;
+	uint16_t payload_len = FromBE16(*(uint16_t *)buf);
+	if (payload_len != 6) {
+		return ParseResult::ERROR;
+	}
+
+	buf += 2;
+	clihlo.major_version = *buf++;
+	clihlo.minor_version = *buf++;
+
+	clihlo.requested_fps = FromBE16(*(uint16_t *)(buf));
+	buf += 2;
+	clihlo.tau = FromBE16(*(int16_t *)(buf));
+	buf += 2;
+
+	if (*buf != 0) {
+		return ParseResult::ERROR;
+	}
+
+	return ParseResult::OK;
+}
diff --git a/src/output/plugins/visualization/Protocol.hxx b/src/output/plugins/visualization/Protocol.hxx
new file mode 100644
index 0000000000..a2cdc1c399
--- /dev/null
+++ b/src/output/plugins/visualization/Protocol.hxx
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright The Music Player Daemon Project
+
+#ifndef VISUALIZATION_PROTOCOL_HXX_INCLUDED
+#define VISUALIZATION_PROTOCOL_HXX_INCLUDED
+
+#include "LowLevelProtocol.hxx"
+#include "SoundAnalysis.hxx"
+
+#include <cstddef>
+#include <cstdint>
+
+namespace Visualization {
+
+/**
+ * \brief A parsed CLIHLO message
+ *
+ * \sa ParseCliHlo
+ *
+ *
+ * The visualization \ref vis_out_protocol "protocol" begins with the client
+ * connecting to the server & providing certain paramters of the sound analysis
+ * it would like to receive. That is done through the CLIHLO message (which see
+ * \a ref vis_out_protocol_proto_clihlo "here").
+ *
+ * See \a vis_out_protocol_timing "timing" for details on parameter tau.
+ *
+ *
+ */
+
+struct ClientHello {
+	/// Major protocol version the client would like to speak
+	uint8_t major_version;
+	/// Minor protocol version the client would like to speak
+	uint8_t minor_version;
+	/// The number of sound analyses per second the client would like to
+	/// receive (presumably the rate at which it is rendering frames, hence
+	/// the name "fps")
+	uint16_t requested_fps;
+	/// The desired offset (named "tau" in the documentation) between song
+	/// time and analysis time at each analysis performed
+	int16_t tau;
+};
+
+enum class ParseResult {
+	OK,
+	NEED_MORE_DATA,
+	ERROR,
+};
+
+/**
+ * \brief Attempt to parse a \ref vis_out_protocol_proto_clihlo "CLIHLO" message
+ * from the given buffer
+ *
+ * \param buf [in] An array of octets potentially containing the message
+ *
+ * \param length [in] The length of \a buf, in octets
+ *
+ * \param clihlo [out] A reference to a `client_hello_t` structure to be
+ * filled-in on successful execution
+ *
+ * \return ParseResult::OK if the message was successfully parsed,
+ * NEED_MORE_DATA if the message is incomplete, or ERROR if the message cannot
+ * be ready from \a buf
+ *
+ *
+ * CLIHLO is the first message in the protocol, sent by the client. See
+ * \ref vis_out_protocol_proto_clihlo "the protocol specification" for details,
+ * and \ref vis_out_protocol "Visualization Network Protocol" for discussion
+ * of the protocol altogether.
+ *
+ *
+ */
+
+ParseResult
+ParseClihlo(void *buf, size_t length, ClientHello &clihlo) noexcept;
+
+/// Serialize an SRVHLO message to wire format
+template <typename OutIter>
+void
+SerializeSrvhlo(std::byte major_ver, std::byte minor_ver, OutIter pout) {
+	using std::byte;
+
+	*pout++ = byte{0}; //
+	*pout++ = byte{1}; // message type
+	*pout++ = byte{0}; //
+	*pout++ = byte{2}; // payload length
+	*pout++ = major_ver;
+	*pout++ = minor_ver;
+	*pout++ = byte{0}; // check byte
+}
+
+/// Serialize a FRAME message header to wire format
+template <typename OutIter>
+OutIter
+SerializeSoundInfoFrameHeader(uint8_t num_chan,
+			      size_t num_samp,
+			      size_t num_freq,
+ OutIter pout) {
+
+	using std::byte;
+
+	// Start with the "magic number" allowing clients to "lock on" to the
+	// stream of sound info frames in the event of an error.
+	// See \ref vis_out_protocol_proto_msgs for details.
+	*pout++ = byte{0x63};
+	*pout++ = byte{0xac};
+	*pout++ = byte{0x84};
+	*pout++ = byte{0x03};
+
+	*pout++ = byte{16};
+	*pout++ = byte{0};
+
+	return SerializeU16(17 + 4 * num_chan * (num_samp + 3 * num_freq + 3),
+			    pout);
+}
+
+/// Serialize a FRAME message payload to wire format
+template <typename OutIter>
+void
+SerializeSoundInfoFrameFooter(OutIter pout) {
+	*pout = std::byte{0x00};
+}
+
+/// Serialize a FRAME message to wire format
+template <typename OutIter>
+void
+SerializeSoundInfoFrame(const Visualization::SoundAnalysis &a,
+			OutIter pout) {
+	pout = SerializeSoundInfoFrameHeader(a.NumChan(), a.NumSamp(),
+					     a.NumFreq(), pout);
+	pout = a.SerializeSoundInfoFramePayload(pout);
+	SerializeSoundInfoFrameFooter(pout);
+}
+
+} // namespace Visualization.
+
+#endif // VISUALIZATION_PROTOCOL_HXX_INCLUDED
diff --git a/src/output/plugins/visualization/SoundAnalysis.cxx b/src/output/plugins/visualization/SoundAnalysis.cxx
new file mode 100644
index 0000000000..fc99aa660a
--- /dev/null
+++ b/src/output/plugins/visualization/SoundAnalysis.cxx
@@ -0,0 +1,422 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright The Music Player Daemon Project
+
+#include "SoundAnalysis.hxx"
+
+#include "Log.hxx"
+#include "config/Block.hxx"
+#include "lib/fmt/AudioFormatFormatter.hxx"
+#include "lib/fmt/RuntimeError.hxx"
+#include "lib/fmt/ThreadIdFormatter.hxx"
+#include "pcm/FloatConvert.hxx"
+#include "util/Domain.hxx"
+
+#include <cassert>
+
+const Domain d_sound_analysis("sound_analysis");
+
+Visualization::SoundAnalysisParameters::SoundAnalysisParameters() noexcept
+: SoundAnalysisParameters(DEFAULT_NUM_SAMPLES, DEFAULT_LO_CUTOFF, DEFAULT_HI_CUTOFF)
+{ }
+
+Visualization::SoundAnalysisParameters::SoundAnalysisParameters(
+	const ConfigBlock &config_block)
+: SoundAnalysisParameters(
+	config_block.GetPositiveValue("num_samples", DEFAULT_NUM_SAMPLES),
+	config_block.GetPositiveValue("lo_cutoff", DEFAULT_LO_CUTOFF),
+	config_block.GetPositiveValue("hi_cutoff", DEFAULT_HI_CUTOFF))
+{ }
+
+Visualization::SoundAnalysisParameters::SoundAnalysisParameters(
+	size_t num_samples_in,
+	float lo_cutoff_in,
+	float hi_cutoff_in):
+	num_samples(num_samples_in),
+	lo_cutoff(lo_cutoff_in), hi_cutoff(hi_cutoff_in)
+{
+	if (lo_cutoff >= hi_cutoff) {
+		throw FmtRuntimeError(
+			"lo_cutoff ({}) must be less than hi_cutoff ({})",
+			lo_cutoff, hi_cutoff);
+	}
+}
+
+/**
+ * \page vis_out_dft The Discrete Fourier Transform & Frequency Analysis
+ *
+ * \section vis_out_dft_intro Introduction
+ *
+ * This page contains some notes on the Discrete Fourier Transform as applied to
+ * music. They are a combination of dimly-remembered mathematics from
+ * university, source code comments from the milkdrop Winamp visualization
+ * plug-in, and the fftw documentation.
+ *
+ * \section vis_out_dft_basics The Basics
+ *
+ * The first thing to note is that the human ear can perceive sounds in the
+ * range 200 - 20,000Hz. For visualization purposes, implementations tend to
+ * throw away frequency data above 10,000Hz or so since there's not much
+ * activity there (something I've observed myself).
+ *
+ * Perceptually, frequency is not linear, it's logarithmic. A change of one
+ * octave corresponds to a doubling in frequency. Intuitively, this means that
+ * the difference between, say, 200 & 300Hz is \em much greater than the
+ * difference betwen 5000 & 5100Hz, for example.
+ *
+ * \subsection vis_out_dft_dft The Discrete Fourier Transform
+ *
+ * Given \c n audio samples, sampled at a frquency of \c F Hz, the DFT computes
+ * \c n complex numbers, each of which corresponds to the frequency:
+ *
+ \code
+	    k * F
+    freq  = -----,  k=0,...,n-1
+	k     n
+
+ \endcode
+ *
+ * (see
+ * <a href="http://fftw.org/fftw3_doc/The-1d-Discrete-Fourier-Transform-_0028DFT_0029.html">here</a>).
+ *
+ * The DFT library I'm using (<a href="http://fftw.org">fftw</a> AKA "The
+ * Fastest Fourier Transofrm in the West") takes advantage of the Hermitian
+ * property of the Fourier Transform of real data in which the k-th Fourier
+ * coefficient is the complex conjugate of the the (n-k)-th coefficient and ony
+ * returns the first n/2+1 Fourier coefficients (i.e. indicies 0 to n/2,
+ * inclusive) to save time & space. See
+ * <a href="http://fftw.org/fftw3_doc/The-1d-Real_002ddata-DFT.html">here</a>.
+ *
+ * Therefore, the first Fourier coefficient returned corresponds to 0Hz, and the
+ * last to:
+ *
+ \code
+           n
+           - * F
+           2       F
+    freq = ----- = -
+             n     2
+ \endcode
+ *
+ * or half the sampling frequency.
+ *
+ * \subsection vis_out_dft_buckets How To Bucket Frequencies
+ *
+ * To divide frequency data into \c N bands (whether for a scheme like bass/
+ * mids/trebs, or into a number of bars for visualization purposes), consider
+ * your frequency range & compute the number of octaves therein. If we let \c n
+ * be the number of octaves, then we know:
+ *
+ \code
+      n     freq_hi        log(freq_hi/freq_lo)
+     2  :=  ------- => n = -------------------
+            freq_lo            log(2)
+ \encode
+ *
+ * The \c N bands will then be:
+ *
+ \code
+                          n/N
+    freq_lo...freq_lo * 2
+
+               n/N             2*n/N
+    freq_lo * 2  ...freq_lo * 2
+
+    ...
+               (N-1)*n/N           n
+    freq_lo * 2  ...    freq_lo * 2
+
+ \endcode
+ *
+ * \subsection vis_out_dft_eg Worked Example
+ *
+ * Let the number of samples n be 576. This means our dft will return n/2 + 1 =
+ * 289 complex numbers. Let our sampling frequency F be 44,100Hz. For each k,
+ * k=0...289, the corresponding frequency will be k * 44100/289, giving us a
+ * frequency range of 0Hz to 22,500Hz. Let's clamp that to 200-11,000, compute
+ * the power spectrum, and divide that power up into three bands: bass, mids &
+ * trebs.
+ *
+ * First, we need to find the indicies into the dft corresponding to our
+ * desired frequency range.
+ *
+ \code
+                       f  * n
+         k * F          k
+    f  = ----- ==> k = ------, where f := the frequency of the k-th
+     k     n              F           k   Fourier coefficient
+
+
+         | 200 * 576 |
+    k0 = | --------- | = floor(2.61...) = 2
+         |  44100    |
+          -         -
+
+          -           -
+         | 11000 * 576 |
+    k1 = | ----------- | = ceil(143.67...) = 144
+         |   44100     |
+
+ \endcode
+ *
+ * So the power spectrum will have 144 - 2 + 1 = 143 elements in it. Nb. we're
+ * throwing away roughly the upper-half of our frequency spectrum.
+ *
+ * To divide these frequencies into three bands such that each band contains the
+ * same number of octaves, we compute how many octaves there are in our
+ * frequency range (call this \c n):
+ *
+ \code
+
+     n   11000        log(11000/200)
+    2  = ----- => n = -------------- = 5.7814
+          200            log(2)
+ \endcode
+ *
+ * In other words, there are 5.7814 octaves in our chosen frequency range. We
+ * want to give each band 5.7814/3 = 1.9271 octaves. That means the three
+ * "buckets" will be:
+ *
+ \code
+                           1.9271
+    200 ........... 200 * 2        or   200 -    761Hz
+
+           1.9271         2*1.9271
+    200 * 2 ....... 200 * 2        or   761 -  2,892Hz
+
+           2*1.9271        5.7814
+    200 * 2 ....... 200 * 2        or 2,892 - 11,000Hz
+
+ \endcode
+ *
+ *
+ */
+
+Visualization::SoundAnalysis::SoundAnalysis(
+	const SoundAnalysisParameters &params,
+	std::shared_ptr<SoundInfoCache> pc)
+: num_samples(params.GetNumSamples()),
+  out_samples((num_samples / 2) + 1),
+  pcache(pc),
+  audio_format(pc->GetFormat()),
+  num_channels(audio_format.channels),
+  cbbuf(params.GetNumSamples() * audio_format.GetFrameSize()),
+  buf(new std::byte[cbbuf]),
+  in(fftwf_alloc_real(num_samples * num_channels), fftwf_free),
+  out(fftwf_alloc_complex(out_samples * num_channels), fftwf_free),
+  bass_mids_trebs(new float[3 * num_channels])
+{
+	if (num_samples > INT_MAX) {
+		throw FmtInvalidArgument(
+			"num_samples({}) may not be larger than {}",
+			num_samples, INT_MAX);
+	}
+
+	int n[] = { (int)num_samples };
+
+	/* The input is assumed to be interleaved; this seems convenient from
+	 * the perspective of how it's stored from the AudioOutput... tho if we
+	 * need an additional copy to convert it to `float`, we'd have the
+	 * opportunity to re-arrange it. */
+
+	int dist = num_samples;
+
+	/* Per the FFTW docs:
+	 *
+	 * "`rank` is the rank of the transform (it should be the size of the
+	 * array `*n`) we use the term rank to denote the number of independent
+	 * indices in an array. For example, we say that a 2d transform has rank
+	 * 2, a 3d transform has rank 3, and so on."
+	 *
+	 * This is always 1, for us.
+	 *
+	 * layout of `in`:
+	 *
+	 * | 0 ... num_samples-1 | num_samples ... 2*num_samples-1 | 2*num_samples ...
+	 * | data for chan 0	 | data for chan 1		   | data for chan 2 */
+
+	/* `howmany` is the number of transforms to compute. The resulting plan
+	 * computes `howmany` transforms, where the input of the k-th transform
+	 * is at location in+k*idist (in C pointer arithmetic), and its output
+	 * is at location out+k*odist. */
+
+	int odist = (num_samples / 2) + 1;
+
+	plan = fftwf_plan_many_dft_r2c(1,             // rank of the input array-- we have one-dimensional arrays
+				       n,	      // the number of elements in each array
+				       num_channels,  // one array for each channel
+				       in.get(),      // input buffer-- need to copy samples here before executing
+				       NULL,
+				       1,	      // input stride
+				       dist,	      // distance between successive arrays (indexes, not bytes)
+				       out.get(),     // output buffer-- overwritten on each execution
+				       NULL,
+				       1,	      // output stride
+				       odist,	      // distance between successive arrays (indexes, not bytes)
+				       FFTW_ESTIMATE);// should probably be zero (to select a more exhaustive
+	// search), but out of an abundance of caution, tell
+	// FFTW to pick a plan quickly
+	if (NULL == plan) {
+		throw FmtRuntimeError("Failed to generate an FFTW plan: "
+				      "num_samp={},num_chan={}",
+				      num_samples, num_channels);
+	}
+
+	freq_lo = params.GetLoCutoff();
+
+	float samples_per_sec = (float) audio_format.GetSampleRate();
+	float ns = (float) num_samples;
+	// The highest frequency we can represent will be
+	float max_freq = (ns - 1.0f) * samples_per_sec / ns;
+	if (max_freq < params.GetHiCutoff()) {
+		FmtWarning(d_sound_analysis,
+			   "Clamping max frequency from {} to {}",
+			   freq_hi, max_freq);
+		freq_hi = max_freq;
+	} else {
+		freq_hi = params.GetHiCutoff();
+	}
+
+	idx_lo = (size_t)floorf(freq_lo *
+				(float) num_samples / samples_per_sec );
+	idx_hi = (size_t) ceilf(freq_hi * (float)num_samples / samples_per_sec);
+
+	float num_octaves = logf(freq_hi/freq_lo) / 0.69314718f;
+
+	float freq_mids = freq_lo * powf(2.0f, num_octaves / 3.0f);
+	float freq_trebs = freq_lo * powf(2.0f, 2.0f * num_octaves / 3.0f);
+
+	idx_mids  = ns *  freq_mids / samples_per_sec;
+	idx_trebs = ns * freq_trebs / samples_per_sec;
+}
+
+bool
+Visualization::SoundAnalysis::Update(SoundInfoCache::Time t) noexcept
+{
+	FmtDebug(d_sound_analysis, "SoundAnalysis::update(tid: {}), time {}us, "
+		 "# samp: {}, buffer size: {}", std::this_thread::get_id(),
+		 duration_cast<std::chrono::microseconds>(t.time_since_epoch()).count(),
+		 num_samples, pcache->Size());
+
+	if (!pcache->GetByTime(num_samples, t, buf.get(), cbbuf)) {
+		FmtWarning(d_sound_analysis, "Failed to get samples by time "
+			   "for sound analysis ({} samples requested, at "
+			   "time {}us for buf size {}).", num_samples,
+			   duration_cast<std::chrono::microseconds>(t.time_since_epoch()).count(),
+			   cbbuf);
+		return false;
+	}
+
+	/* Copy the raw PCM data from `buf` into `in`. I hate this, but we need
+	 * to convert the input data from `uint16_t` (or whatever) to `float`
+	 * regardless. We could, of course, do the conversion when the PCM data
+	 * is added to the cache, but since I anticipate processing far fewer
+	 * samples than I store, I expect this to be more efficient (both in
+	 * terms of time & space).
+	 *
+	 * Since we have to do the copy anyway, let's convert from interleaved
+	 * to sequential (i.e. all samples for the first channel laid-out
+	 * contiguously, followed by all from the second, and so forth). */
+	typedef IntegerToFloatSampleConvert<SampleFormat::S8> S8Cvt;
+	typedef IntegerToFloatSampleConvert<SampleFormat::S16> S16Cvt;
+	typedef IntegerToFloatSampleConvert<SampleFormat::S32> S32Cvt;
+	typedef IntegerToFloatSampleConvert<SampleFormat::S24_P32> S24P32;
+
+	for (size_t i = 0; i < num_samples; ++i) {
+		for (size_t j = 0; j < num_channels; ++j) {
+			/* `buf` index: i * num_channels + j
+			 * `in` index: j * num_samples + i */
+			float x;
+			switch (audio_format.format) {
+			case SampleFormat::S8:
+				x = S8Cvt::Convert(
+					*(int8_t*)buf[i * num_channels + j]);
+				break;
+			case SampleFormat::S16:
+				x = S16Cvt::Convert(
+					*(int16_t*) (buf.get() +
+						     2 * (i*num_channels + j)));
+				break;
+			case SampleFormat::S32:
+				x = S32Cvt::Convert(
+					*(int32_t*)(buf.get() +
+						    4 * (i*num_channels + j)));
+				break;
+			case SampleFormat::FLOAT:
+				x = *(float*)(buf.get() +
+					      4 * (i * num_channels + j));
+				break;
+			case SampleFormat::S24_P32:
+				/* signed 24 bit integer samples, packed in 32
+				 * bit integers (the most significant byte is
+				 * filled with the sign bit) */
+				x = S24P32::Convert(
+					*(int32_t *)(buf.get() +
+						     4 * (i*num_channels + j)));
+				break;
+			default:
+				assert(false);
+			}
+			in.get()[j * num_samples + i] = x;
+		}
+	}
+
+	fftwf_execute(plan);
+
+	size_t max_coeffs_idx = num_samples/2;
+
+	for (unsigned c = 0; c < num_channels; ++c) {
+
+		bass_mids_trebs[3 * c] = bass_mids_trebs[3 * c + 1] =
+			bass_mids_trebs[3*c+2] = 0.0f;
+
+		// walk [idx_lo, idx_hi)
+		for (size_t i = idx_lo; i < idx_hi; ++i) {
+			size_t j = i;
+			if (j > max_coeffs_idx) {
+				j = num_samples - j;
+			}
+			fftwf_complex *pout =
+				out.get() + c * (max_coeffs_idx + 1);
+			float contrib = sqrt(
+				pout[j][0]*pout[j][0] + pout[j][1]*pout[j][1]);
+			if (i < idx_mids) {
+				bass_mids_trebs[3*c] += contrib;
+			} else if (i < idx_trebs) {
+				bass_mids_trebs[3*c + 1] += contrib;
+			} else {
+				bass_mids_trebs[3*c + 2] += contrib;
+			}
+		}
+	}
+
+	return true;
+}
+
+bool
+Visualization::SoundAnalysis::GetCoeffs(fftwf_complex *coeffs,
+					size_t num_complex) const noexcept {
+	if (num_complex < out_samples * num_channels) {
+		return false;
+	}
+
+	/* Would prefer to use `std::copy`, but fftw regrettably defines
+	 * `fftwf_complex` as `float[2]` which confuses it. */
+	memcpy(coeffs, out.get(),
+	       out_samples * num_channels * sizeof(fftwf_complex));
+
+	return true;
+}
+
+bool
+Visualization::SoundAnalysis::GetBassMidsTrebs(float *buf_out,
+											   size_t num_buf) const {
+
+	if (num_buf < 3 * num_channels) {
+		return false;
+	}
+
+	std::copy(bass_mids_trebs.get(),
+		  bass_mids_trebs.get() + 3 * num_channels,
+		  buf_out);
+	return true;
+}
diff --git a/src/output/plugins/visualization/SoundAnalysis.hxx b/src/output/plugins/visualization/SoundAnalysis.hxx
new file mode 100644
index 0000000000..92db0d600c
--- /dev/null
+++ b/src/output/plugins/visualization/SoundAnalysis.hxx
@@ -0,0 +1,307 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright The Music Player Daemon Project
+
+#ifndef SOUND_ANALYSIS_HXX_INCLUDED
+#define SOUND_ANALYSIS_HXX_INCLUDED 1
+
+#include "SoundInfoCache.hxx"
+#include "LowLevelProtocol.hxx"
+
+#include <fftw3.h>
+
+#include <algorithm>
+#include <cstddef>
+#include <memory>
+
+#include <math.h>
+
+struct ConfigBlock;
+
+namespace Visualization {
+
+/**
+ * \brief Convenience class for expressing sound analysis parameters exclusive
+ * of the audio format
+ *
+ *
+ * There are any number of parameters governing our analysis of PCM data. Other
+ * than the `AudioFormat`, they are read from configuration at startup and
+ * constant. Rather than force callers to write methods taking many parameters,
+ * this class colects them all in one place, and enforces some constraints on
+ * their values.
+ *
+ *
+ */
+
+class SoundAnalysisParameters
+{
+	/* The number of samples used for each analysis; this must be greater
+	 * than zero and needn't be large (say, less than 1024). Configuration
+	 * value "num_samples" */
+	size_t num_samples;
+	/* Data lower than this frequency (in the frequency domain) shall be
+	 * discarded; must be greater than or equal to zero, and less than
+	 * hi_cutoff. A typical value would be 200 (the lower range of human
+	 * perception). Units: Hz. Configuration value "lo_cutoff" */
+	float lo_cutoff;
+	/* Data greater than this frequency (in the frequency domain) shall be
+	 * discarded; must be greater than or equal to zero, and greater than
+	 * lo_cutoff. A typical value would be 10000-12000 (empirically, there's
+	 * not a lot of activity above 10000 in song data). Units
+	 * Hz. Configuration value "hi_cutoff" */
+	float hi_cutoff;
+
+	static constexpr size_t DEFAULT_NUM_SAMPLES = 513;
+	static constexpr size_t DEFAULT_LO_CUTOFF = 200;
+	static constexpr size_t DEFAULT_HI_CUTOFF = 10000;
+
+public:
+	SoundAnalysisParameters() noexcept;
+	explicit SoundAnalysisParameters(const ConfigBlock &config_block);
+	SoundAnalysisParameters(size_t num_samples, float lo_cutoff,
+				float hi_cutoff);
+
+	size_t
+	GetNumSamples() const noexcept {
+		return num_samples;
+	}
+	float
+	GetLoCutoff() const noexcept {
+		return lo_cutoff;
+	}
+	float
+	GetHiCutoff() const noexcept {
+		return hi_cutoff;
+	}
+};
+
+/**
+ * \class SoundAnalysis
+ *
+ * \brief Analayze PCM data in a manner convienient for visualization authors
+ *
+ *
+ * This class houses our logic for going from raw PCM data to the power
+ * spectrum, bass/mids/trebs &c. Instances are constructed with configuration
+ * information on the analysis details, and repeated analysis for different
+ * datasets is performed via update(). Since instances allocate input & output
+ * buffers for the discrete Fourier transform, they are not meant to be copied
+ * around.
+ *
+ *
+ */
+
+class SoundAnalysis {
+
+	/// # of samples to be used in each analysis
+	size_t num_samples;
+	/// # of Fourier coefficients computed by FFTW (should be (num_samples /
+	/// # 2) + 1)
+	size_t out_samples;
+	std::shared_ptr<SoundInfoCache> pcache;
+	AudioFormat audio_format;
+	/* # of audio channels (e.g. 1 is mono, 2 is stereo--
+	 * # cf. SampleFormat.hxx); should be audio_format.num_channels */
+	uint8_t num_channels;
+	/// Size of `buf`, in bytes
+	size_t cbbuf;
+	/// Pre-allocated buffer for raw PCM data
+	std::unique_ptr<std::byte[]> buf;
+	/// Input array for all FFTs performed by this `SoundAnalysis` instance
+	std::unique_ptr<float, void (*)(void*)> in;
+	/// Output array for all FFTs performed by this `SoundAnalysis` instance
+	std::unique_ptr<fftwf_complex, void (*)(void*)> out;
+	/* Pre-computed (by fftw) information on the fastest way to compute the
+	 * Discrete Fourier Transform on the underlying hardware */
+	fftwf_plan plan;
+	/* Frequency cutoffs, in Hz; we'll return frequencies in the range
+	   [freq_lo, freq_hi] */
+	float freq_lo, freq_hi;
+	/* Indicies into `out` corresponding to the desired frequency range;
+	 * that range is indexed by [index_lo, index_hi) */
+	size_t idx_lo, idx_hi;
+	/// Indicies into `out` corresponding "mids" & "trebs"
+	size_t idx_mids, idx_trebs;
+	/* Bass/mids/trebs, laid-out as [bass, mids, trebs, bass, mids, trebs]
+	 * (i.e. 3 * num_channels floats) */
+	std::unique_ptr<float[]> bass_mids_trebs;
+
+public:
+	SoundAnalysis(const SoundAnalysisParameters &params,
+		      std::shared_ptr<SoundInfoCache> pc);
+
+	uint8_t
+	NumChan() const noexcept {
+		return num_channels;
+	}
+	/// Return the number of audio samples, per channel, used in each analysis
+	size_t
+	NumSamp() const noexcept {
+		return num_samples;
+	}
+	/* Return the number of Fourier coefficients & power spectrum values
+	 * returned, per channel; this is determined by the number of samples and
+	 * the frequency cutoffs */
+	size_t
+	NumFreq() const noexcept {
+		return idx_hi - idx_lo;
+	}
+
+	/// Update the current analysis to be current as of time \a t
+	bool Update(SoundInfoCache::Time t) noexcept;
+
+	/* Return the first half of the Fourier coefficients (bearing in mind
+	 * that the Hermitian property means we only need to deal with the first
+	 * nsamp/2 + 1) with no frequency cutoffs. Mostly used for testing */
+	bool GetCoeffs(fftwf_complex *coeffs,
+		       size_t num_complex) const noexcept;
+	bool GetBassMidsTrebs(float *buf, size_t num_buf) const;
+
+	/////////////////////////////////////////////////////////////////////////
+	//		     Serialization Support                             //
+	/////////////////////////////////////////////////////////////////////////
+
+	/* Write the waveforms used in the current analysis to \a pout; return
+	 * the updated iterator. The waveforms will be written as per the
+	 * \ref vis_out_protocol_proto_frame "protocol spec".
+	 */
+	template <typename OutIter>
+	OutIter
+	SerializeWaveforms(OutIter pout) const {
+		const float *pin = in.get();
+		for (size_t j = 0; j < num_channels; ++j) {
+			for (size_t i = 0; i < num_samples; ++i) {
+				pout = SerializeFloat(pin[j * num_samples + i],
+						      pout);
+			}
+		}
+		return pout;
+	}
+
+	/* Write the frequency coefficients that resulted from the current analysis
+	 * subject to frequency cutoffs to \a pout; return the updated
+	 * iterator. The coefficients will be written as per the
+	 * \ref vis_out_protocol_proto_frame "protocol spec". */
+	template <typename OutIter>
+	OutIter
+	SerializeCoefficients(OutIter pout) const {
+		return TransformCoeffs(pout,  SerializeComplex);
+	}
+
+	/* Write the magnitude of a complex number (presumably a Fourier
+	 * coefficient) to \a pout; return the updated iterator. The magnitude will
+	 * be written as per the \ref vis_out_protocol_proto_frame "protocol spec". */
+	template <typename OutIter>
+	static
+	OutIter
+	SerializeSpectrum(const fftwf_complex c, OutIter pout) {
+		return SerializeFloat(sqrt(c[0] * c[0] + c[1] * c[1]), pout);
+	}
+
+	/* Write the power spectrum that resulted from the current analysis to \a
+	 * pout; return the updated iterator. The power spectrum will be written as
+	 * per the \ref vis_out_protocol_proto_frame "protocol spec". */
+	template <typename OutIter>
+	OutIter
+	SerializePowerSpectra(OutIter pout) const {
+		return TransformCoeffs(pout, SerializeSpectrum);
+	}
+
+	/* Write the bass/mids/trebs values that resulted from the current analysis
+	 * to \a pout; return the updated iterator. The values will be written as
+	 * per the \ref vis_out_protocol_proto_frame "protocol spec". */
+	template <typename OutIter>
+	OutIter
+	SerializeBassMidsTrebs(OutIter pout) const {
+		float *bmt = bass_mids_trebs.get();
+		for (size_t i = 0; i < num_channels; ++i) {
+			pout = SerializeFloat(bmt[3 * i], pout);
+			pout = SerializeFloat(bmt[3 * i + 1], pout);
+			pout = SerializeFloat(bmt[3 * i + 2], pout);
+		}
+		return pout;
+	}
+
+	/* Write the payload of a \c FRAME message to \a pout; return the updated
+	 * iterator. The payload will be written as per the
+	 * \ref vis_out_protocol_proto_frame "protocol spec". */
+	template <typename OutIter>
+	OutIter
+	SerializeSoundInfoFramePayload(OutIter pout) const {
+		pout = SerializeU16(num_samples, pout);
+		*pout++ = (std::byte) num_channels;
+		pout = SerializeU16(audio_format.GetSampleRate(), pout);
+		pout = SerializeWaveforms(pout);
+		pout = SerializeU16(NumFreq(), pout);
+		pout = SerializeFloat(freq_lo, pout);
+		pout = SerializeFloat(freq_hi, pout);
+		pout = SerializeU16(idx_lo, pout);
+		pout = SerializeCoefficients(pout);
+		pout = SerializePowerSpectra(pout);
+		pout = SerializeBassMidsTrebs(pout);
+		return pout;
+	}
+
+	/* Write the Fourier coefficients in the range `[idx_lo, idx_hi)` to
+	 * \a pout first transforming them by \a op. */
+	template <typename OutIter>
+	OutIter
+	TransformCoeffs(
+		OutIter pout,
+		OutIter (*op)(const fftwf_complex, OutIter pout)) const {
+
+		/* We wish to serialize the Fourier cofficients [idx_lo,
+		 * idx_hi), transformed by `op`. The issue is that `out` stores
+		 * the coefficients [0, num_samples/2 + 1), so we need to
+		 * tranform the indexing operation. */
+		const fftwf_complex *po = out.get();
+
+		// The # of frequencies stored in `out` per channel
+		size_t total_freq_per_chan = num_samples / 2 + 1;
+
+		// The maximum indexable frequency per channel
+		size_t upper_freq_per_chan =
+			std::min(idx_hi, total_freq_per_chan);
+
+		/* In both `out` & `pout`, the coefficients are laid out as: |
+		 * coeffs for chan #0... | coeffs for chan #1... | ... |
+		 * so outer loop will be on channel. */
+		for (unsigned chan = 0; chan < num_channels; ++chan) {
+
+			/* This is the index into `out` of the very first
+			 * Fourier coefficient for this channel. */
+			size_t first_freq_this_chan =
+				chan * total_freq_per_chan;
+			/* Beginning from here, we wan to walk the indicies:
+			 * 	   [idx_lo, upper_freq_per_chan)
+			 * This will take us from the "low" frequency index up
+			 * to num_samp/2 + 1 or idx_hi, whichever is least. */
+			for (size_t i = first_freq_this_chan + idx_lo;
+				 i < first_freq_this_chan + upper_freq_per_chan;
+				 ++i) {
+				pout = op(po[i], pout);
+			}
+			/* *If* idx_hi is greater than num_samp/2+1, walk back
+			 * *down* the Fourier coefficients (taking advantiage of
+			 * the Hermetian property) */
+			if (idx_hi > total_freq_per_chan) {
+				for (size_t i =
+					     first_freq_this_chan + idx_hi - 1;
+				     i >= first_freq_this_chan +
+					     total_freq_per_chan;
+					 --i) {
+					fftwf_complex c = {
+						po[num_samples - i][0],
+						-po[num_samples-i][1] };
+					pout = op(c, pout);
+				}
+			}
+		}
+		return pout;
+	}
+
+};
+
+} // namespace Visualization
+
+#endif // SOUND_ANALYSIS_HXX_INCLUDED
diff --git a/src/output/plugins/visualization/SoundInfoCache.cxx b/src/output/plugins/visualization/SoundInfoCache.cxx
new file mode 100644
index 0000000000..e7625bcae3
--- /dev/null
+++ b/src/output/plugins/visualization/SoundInfoCache.cxx
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright The Music Player Daemon Project
+
+#include "SoundInfoCache.hxx"
+
+#include "Log.hxx"
+#include "lib/fmt/ThreadIdFormatter.hxx"
+#include "util/Domain.hxx"
+
+#include <cmath>
+#include <cstring>
+
+using namespace Visualization;
+using namespace std::chrono;
+
+const Domain d_sound_info_cache("vis_sound_info_cache");
+
+inline
+typename std::chrono::microseconds::rep
+NowTicks() {
+	return duration_cast<std::chrono::microseconds>(
+		std::chrono::system_clock::now().time_since_epoch()).count();
+}
+
+Visualization::SoundInfoCache::SoundInfoCache(const AudioFormat &audio_format,
+					      const Duration &buf_span):
+	fmt(audio_format),
+	secs_per_frame(1. / double(fmt.GetSampleRate())),
+	frame_size(audio_format.GetFrameSize()),
+	ring(fmt.TimeToSize(buf_span)),
+	cb(0),
+	p0(0),
+	p1(0)
+{ }
+
+/**
+ * \brief Add \a size bytes of PCM data to the cache; \a data is assumed to be
+ * PCM data in our audio format
+ *
+ *
+ * \param data [in] Address of a buffer containing PCM samples to be added to the cache
+ *
+ * \param size [in] Size of \a data, in bytes
+ *
+ *
+ * This method will add \a data to the end of the cache, overwriting earlier
+ * data if necessary.
+ *
+ * Nb. regarding the corner case where \a size is larger than the cache itself:
+ * in this event, the implementation will simply write as much of \a data into
+ * the cache as possible, discarding both the first portion of \a data as well
+ * as the previous contents of the cache.
+ *
+ *
+ */
+
+void
+Visualization::SoundInfoCache::Add(const void *data, size_t size)
+{
+	FmtDebug(d_sound_info_cache, "[{}] SoundInfoCache::add(tid:{},"
+		  "bytes:{})", NowTicks(), std::this_thread::get_id(), size);
+
+	std::lock_guard<Mutex> guard(mutex);
+
+	if (t0.time_since_epoch().count() == 0) {
+		t0 = system_clock::now();
+		t1 = t0;
+	}
+
+	size_t cb_ring = ring.size();
+	if (size > cb_ring) {
+		/* Special case: we can't fit this chunk into the ring buffer;
+		   just take the last `cb_ring` bytes & discard everything
+		   earlier. */
+		size_t lost = size - cb_ring;
+		memcpy(ring.data(), (const uint8_t*)data + lost, cb_ring);
+		cb = cb_ring;
+		p0 = p1 = 0;
+		t1 += fmt.SizeToTime<Duration>(size);
+		t0 = t1 - fmt.SizeToTime<Duration>(cb_ring);
+	} else {
+		/* Happy path: `size` is <= `cb_ring`. We can fit it all, but
+		   may overwrite old data. */
+		size_t part1 =
+			std::min(size, cb_ring - p1); // #bytes written at p1
+		size_t part2 = size - part1; // #bytes "wrapped around"
+
+		memcpy(ring.data() + p1, data, part1);
+		memcpy(ring.data(), (const uint8_t*)data + part1, part2);
+
+		p1 = (p1 + size) % cb_ring;
+
+		// # bytes overwritten at start/p0
+		size_t part3;
+		if (cb == cb_ring) {
+			part3 = size;
+		} else {
+			part3 = part2 > (size_t) p0 ? part2 - p0 : 0;
+		}
+
+		p0 = (p0 + part3) % cb_ring;
+		cb = cb + size - part3;
+
+		t0 += fmt.SizeToTime<Duration>(part3);
+		t1 += fmt.SizeToTime<Duration>(size);
+	}
+}
+
+// This is primarily used for testing purposes.
+bool
+Visualization::SoundInfoCache::GetFromBeginning(size_t nsamp,
+						void *buf,
+						size_t cbbuf) const
+{
+	std::lock_guard<Mutex> guard(mutex);
+
+	size_t cbsamp = nsamp * frame_size;
+	if (cbsamp > cbbuf) {
+		return false;
+	}
+
+	size_t part1 = std::min(cbsamp, ring.size() - p0);
+	size_t part2 = cbsamp - part1;
+	memcpy(buf, ring.data() + p0, part1);
+	memcpy((uint8_t*)buf + part1, ring.data(), part2);
+
+	return true;
+}
+
+/**
+ * \brief Retrieve \a nsamp PCM samples ending at time \a t; copy them into
+ * \a buf; will return false if this cannot be done for any reason
+ *
+ *
+ * \param nsamp [in] the number of PCM samples desired by the caller; this
+ * corresponds to an AudioFormat "frame": IOW each sample is made up of multiple
+ * channels of PCM data
+ *
+ * \param t [in] the time at which the sampling shall \e end
+ *
+ * \param buf [in] a caller-supplied buffer to which, on success, \a nsamp
+ * audio frames will be copied
+ *
+ * \param cbbuf [in] the size, in bytes, of the buffer at \a buf
+ *
+ * \return true on success, false on failure
+ *
+ *
+ * This method will copy \a nsamp audio samples ending at time \a t into
+ * \a buf. If \a t does not exactly correspond to an audio sample, it will be
+ * adjusted by the implementation to correspond to the next whole sample.
+ *
+ *
+ */
+
+bool
+Visualization::SoundInfoCache::GetByTime(size_t nsamp, Time t,
+					 void *buf, size_t cbbuf) const
+{
+	using std::min;
+
+	FmtDebug(d_sound_info_cache, "[{}] SoundInfoCache::get_by_time"
+		 "(tid:{},t:{}us, delta:{}us)", NowTicks(), std::this_thread::get_id(),
+		 duration_cast<std::chrono::microseconds>(t.time_since_epoch()).count(),
+		 duration_cast<std::chrono::microseconds>(t1 - t).count());
+
+	std::lock_guard<Mutex> guard(mutex);
+
+	size_t cbsamp = nsamp * frame_size;
+	if (cbsamp > cbbuf) {
+		/* Can't fit the requested number of frames/samples into `buf`--
+		 fail. */
+		FmtWarning(d_sound_info_cache,
+			   "[{}] SoundInfoCache::get_by_time: can't fit {} "
+			   "samples into {} bytes", NowTicks(), nsamp, cbbuf);
+		return false;
+	}
+
+	if (t > t1) {
+		FmtWarning(d_sound_info_cache,
+			   "[{}] SoundInfoCache::get_by_time: time t {}us is "
+			   "greater than time t1 {}us-- failing.",
+			   NowTicks(),
+			   duration_cast<std::chrono::microseconds>(t.time_since_epoch()).count(),
+			   duration_cast<std::chrono::microseconds>(t1.time_since_epoch()).count());
+		return false;
+	}
+
+	/* Determine which frame `t` falls into. If `t - t0` is a perfect
+	   multiple of the time-per-frame, use the last frame.
+
+	   I need the duration in `t-t0` to be in seconds, but in seconds, but
+	   with the fractional part. */
+	double delta_t = double(duration_cast<microseconds>(t - t0).count()) / 1000000.;
+	ptrdiff_t pb =
+		p0 + ptrdiff_t(ceil(delta_t / secs_per_frame)) * frame_size;
+
+	// Make sure we have enough samples in [t0, t) to satisfy this request.
+	size_t cb_in_buf = size_t(ceil(delta_t / secs_per_frame)) * frame_size;
+	if (cbsamp > cb_in_buf) {
+		FmtWarning(d_sound_info_cache,
+			   "[{}] SoundInfoCache::get_by_time: the requested "
+			   "number of samples take up {} bytes, but we only "
+			   "have {} bytes in the buffer.",
+			   NowTicks(), cbsamp, cb_in_buf);
+		return false;
+	}
+
+	size_t cb_ring = ring.size();
+	ptrdiff_t pa = pb - nsamp * frame_size;
+	pb = pb % cb_ring;
+	pa = pa % cb_ring;
+
+	/* So we want to copy offsets [pa, pb) % cb_ring :=> buf. "part1"
+	   denotes the range from `pa` to the end of the buffer, and "part2"
+	   that from the start of the buffer to `pb`. */
+	size_t part1 = min(cbsamp, cb_ring - pa);
+	size_t part2 = cbsamp - part1;
+	memcpy(buf, ring.data() + pa, part1);
+	memcpy((uint8_t*)buf + part1, ring.data(), part2);
+
+	return true;
+}
+
+/// Return true IFF the ring buffer is empty
+bool
+Visualization::SoundInfoCache::Empty() const {
+	std::lock_guard<Mutex> guard(mutex);
+	return 0 == Size();
+}
+
+std::pair<SoundInfoCache::Time, SoundInfoCache::Time>
+Visualization::SoundInfoCache::Range() const
+{
+	std::lock_guard<Mutex> guard(mutex);
+	return std::make_pair(t0, t1);
+}
+
+std::size_t
+Visualization::SoundInfoCache::Size() const
+{
+	std::lock_guard<Mutex> guard(mutex);
+	return cb;
+}
diff --git a/src/output/plugins/visualization/SoundInfoCache.hxx b/src/output/plugins/visualization/SoundInfoCache.hxx
new file mode 100644
index 0000000000..9d25964116
--- /dev/null
+++ b/src/output/plugins/visualization/SoundInfoCache.hxx
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright The Music Player Daemon Project
+
+#ifndef SOUND_INFO_CACHE_HXX_INCLUDED
+#define SOUND_INFO_CACHE_HXX_INCLUDED
+
+#include "output/Timer.hxx"
+#include "pcm/AudioFormat.hxx"
+#include "thread/Mutex.hxx"
+#include "util/AllocatedArray.hxx"
+
+#include <chrono>
+#include <memory>
+
+namespace Visualization {
+
+/**
+ * \brief Thread-safe cache for recent PCM data
+ *
+ *
+ * Class SoundInfoCache maintains a ring buffer (AKA circular buffer) for PCM
+ * data to cap the amount of of memory used. It keeps two pointers into that
+ * buffer: the beginning and the end of valid data, along with the timestamps
+ * corresponding to each.
+ *
+ * The general contract is that once the ctor returns, the caller has an
+ * instance with an empty ring buffer & that is ready to accept data. Time
+ * starts from the first invocation of add(). Successive invocations of add()
+ * are assumed to represent contiguous ranges of sound data (i.e. there is no
+ * way to represent gaps).
+ *
+ * Instances may have their methods invoked by multiple threads, so any method
+ * invocation will block on acquiring a Mutex. I had initially considered a
+ * single-writer, multi-reader lock in the interests of allowing many
+ * simultaneous reads, but in practice it would not be an improvement, since
+ * there is only one reader & one writer, and the writer, empirically, is the
+ * more frequent caller.
+ *
+ * A circular buffer is surprisingly difficult to write. I considered
+ * abstracting this implementation into a general purpose library class, but
+ * there are a number of implementation-specific choices arguing against that:
+ *
+ * - using a flag versus wasting a slot to distinguish full from empty
+ * - overwrite versus drop when new data won't fit
+ * - copy in bulk (via `mempcy()`) versus copying slot-by-slot
+ *
+ * In the end I decided to just write an application-specific implementation.
+ *
+ *
+ */
+
+class SoundInfoCache {
+public:
+        typedef std::chrono::system_clock::duration Duration;
+        typedef std::chrono::time_point<std::chrono::system_clock> Time;
+
+private:
+	AudioFormat fmt;
+	/// Time per frame, in seconds
+	double secs_per_frame;
+	/// Sample size, in bytes
+	unsigned frame_size;
+	/* Mutex guarding the ring buffer since instances will be accessed from
+	   multiple threads */
+	mutable Mutex mutex;
+	/// this is the ring buffer
+        AllocatedArray<uint8_t> ring;
+	/// # of bytes currently in the ring buffer (as distinct from capacity)
+	std::size_t cb;
+	/// Valid PCM data exists in buf[p0, p1)
+	size_t p0, p1;
+	/// Time t0 corresponds to p0, t1 to p1
+	Time t0, t1;
+
+public:
+	/* Create a cache storing \a buf_span time's worth PCM data in format
+	   \a audio_format */
+	SoundInfoCache(const AudioFormat &audio_format,
+		       const Duration &buf_span);
+
+public:
+	/* Add \a size bytes of PCM data to the cache; \a data is assumed to be
+	   PCM data in our audio format */
+	void Add(const void *data, size_t size);
+	AudioFormat GetFormat() const noexcept {
+		return fmt;
+	}
+	/* Read \a nsamp audio samples from the \e beginning of the buffer; will
+	   return false if \a buf is not large enough to accomodate that */
+	bool GetFromBeginning(size_t nsamp, void *buf, size_t cbbuf) const;
+	/* Retrieve \a nsamp PCM samples ending at time \a t; copy them into \a
+	   buf; will return false if this cannot be done for any reason */
+	bool GetByTime(size_t nsamp, Time t, void *buf, size_t cbbuf) const;
+	/// Return true IFF the ring buffer is empty
+	bool Empty() const;
+	/// Retrieve the time range for which this cache has data
+	std::pair<Time, Time> Range() const;
+	/// Return the # of bytes in the buffer (as opposed to buffer capacity)
+	std::size_t Size() const;
+};
+
+} // namespace Visualization
+
+#endif // SOUND_INFO_CACHE_HXX_INCLUDED
diff --git a/src/output/plugins/visualization/VisualizationClient.cxx b/src/output/plugins/visualization/VisualizationClient.cxx
new file mode 100644
index 0000000000..0f18883e47
--- /dev/null
+++ b/src/output/plugins/visualization/VisualizationClient.cxx
@@ -0,0 +1,534 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright The Music Player Daemon Project
+
+#include "VisualizationClient.hxx"
+
+#include "Log.hxx"
+#include "event/Chrono.hxx"
+#include "lib/fmt/ThreadIdFormatter.hxx"
+#include "util/Domain.hxx"
+
+#include <chrono>
+#include <thread>
+#include <variant>
+
+const Domain d_vis_client("vis_client");
+
+inline
+typename std::chrono::microseconds::rep
+NowTicks() {
+	return duration_cast<std::chrono::microseconds>(
+		std::chrono::system_clock::now().time_since_epoch()).count();
+}
+
+
+// Invoked when the client connects and the plugin is in the "closed" state.
+Visualization::VisualizationClient::VisualizationClient(
+	UniqueSocketDescriptor fd,
+	EventLoop &event_loop,
+	const SoundAnalysisParameters &params):
+	BufferedSocket(fd.Release(), event_loop), // schedules a read
+	sound_params(params),
+	num_samp(params.GetNumSamples()),
+	protocol_state(ProtocolState::Init),
+	timer(event_loop, BIND_THIS_METHOD(OnTimer))
+{ }
+
+// Invoked when the client connects and the plugin is in the "opened" state.
+Visualization::VisualizationClient::VisualizationClient(
+	UniqueSocketDescriptor fd,
+	EventLoop &event_loop,
+	const SoundAnalysisParameters &params,
+	const std::shared_ptr<SoundInfoCache> &pcache):
+	BufferedSocket(fd.Release(), event_loop), // schedules a read
+	sound_params(params),
+	num_samp(params.GetNumSamples()),
+	pcm_state(HavePcmData {
+		pcache, Visualization::SoundAnalysis(params, pcache) }),
+	protocol_state(ProtocolState::Init),
+	timer(event_loop, BIND_THIS_METHOD(OnTimer))
+{ }
+
+void
+Visualization::VisualizationClient::OnPluginOpened(
+	const std::shared_ptr<SoundInfoCache> &pcache)
+{
+	FmtDebug(d_vis_client, "[{}] VisualizationClient::OnPluginOpened("
+		 "this:{},tid:{},state:{})", NowTicks(), (size_t)this,
+		 std::this_thread::get_id(), (int)protocol_state);
+
+	pcm_state = HavePcmData {
+		pcache, Visualization::SoundAnalysis(sound_params, pcache)
+	};
+
+	HandleFirstFrame();
+}
+
+void
+Visualization::VisualizationClient::OnPluginClosed()
+{
+	FmtDebug(d_vis_client, "[{}] VisualizationClient::OnPluginClosed("
+		 "this:{},tid:{},state:{})", NowTicks(), (size_t)this,
+		 std::this_thread::get_id(), (int)protocol_state);
+
+	if (IsClosed()) {
+		Shutdown();
+		return;
+	}
+
+	// Update `pcm_state`...
+	pcm_state = std::monostate{};
+	/* but no need to do anything else. We'll detect the fact that the
+	   plugin is closed during subsequent state transitions & handle it
+	   there. */
+}
+
+Visualization::VisualizationClient::~VisualizationClient() {
+	FmtDebug(d_vis_client, "[{}] VisualizationClient::~VisualizationClient()"
+		 "this:{},tid:{},state:{})", NowTicks(), (size_t)this,
+		 std::this_thread::get_id(), (int)protocol_state);
+	// This will be invoked on the main thread; the socket & underlying
+	// `SocketEvent` will be torn-down on the I/O thread.
+	timer.Cancel();
+}
+
+BufferedSocket::InputResult
+Visualization::VisualizationClient::OnSocketInput(void *data,
+						  size_t length) noexcept
+{
+	FmtDebug(d_vis_client, "[{}] VisualizationClient::OnSocketInput("
+		 "this:{},tid:{},state:{},length:{})", NowTicks(),
+		 (size_t)this, std::this_thread::get_id(),
+			 (int)protocol_state, length);
+
+	// We have data available to be read, and it's present in `data`...
+	if (ProtocolState::Init != protocol_state) {
+		Shutdown();
+		return InputResult::CLOSED;
+	}
+
+	// attempt to parse it as a CLIHLO message...
+	ClientHello clihlo;
+	ParseResult parse_result = ParseClihlo(data, length, clihlo);
+	if (ParseResult::NEED_MORE_DATA == parse_result) {
+		return InputResult::MORE;
+	} else if (ParseResult::ERROR == parse_result) {
+		LogError(d_vis_client,
+			 "Expected CLIHLO, received invalid message.");
+		Shutdown();
+		return InputResult::CLOSED;
+	}
+
+	FmtDebug(d_vis_client, "[{}] Got CLIHLO: {}fps, tau={}ms", NowTicks(),
+			clihlo.requested_fps, clihlo.tau);
+
+	if (0 != clihlo.major_version || 1 != clihlo.minor_version) {
+		FmtWarning(d_vis_client, "Unexpected protocol version {}.{} "
+			   "requested-- proceeding to serve 0.1.",
+			   clihlo.major_version, clihlo.minor_version);
+	}
+
+	if (0 == clihlo.requested_fps) {
+		LogError(d_vis_client,
+			 "Client requested 0fps-- closing connection.");
+		Shutdown();
+		return InputResult::CLOSED;
+	}
+
+	// OK-- we have timings:
+	timings = HaveClientInfo {
+		std::chrono::milliseconds(clihlo.tau),
+		std::chrono::milliseconds(int(1000. / clihlo.requested_fps))
+	};
+
+	// Seems legit-- compose our response...
+	ConsumeInput(length);
+
+	next_frame.clear();
+	SerializeSrvhlo((std::byte)0, (std::byte)1, back_inserter(next_frame));
+
+	FmtDebug(d_vis_client, "[{}] Composed a SRVHLO frame, cancelled read, "
+		 "scheduled a write, and shifted to state {}.", NowTicks(),
+			 (int)ProtocolState::SrvHlo);
+
+	// shift state...
+	protocol_state = ProtocolState::SrvHlo;
+	// and schedule a write.
+	event.CancelRead();
+	event.ScheduleWrite();
+	return InputResult::PAUSE;
+}
+
+void
+Visualization::VisualizationClient::OnSocketError(
+	std::exception_ptr ep) noexcept {
+	LogError(ep);
+	Shutdown();
+}
+
+void
+Visualization::VisualizationClient::OnSocketClosed() noexcept {
+	FmtInfo(d_vis_client, "[{}] VisualizationClient::OnSocketClosed("
+		"this:{},tid:{})", NowTicks(), (size_t)this, std::this_thread::get_id());
+	Shutdown();
+}
+
+void
+Visualization::VisualizationClient::OnSocketReady(unsigned flags) noexcept
+{
+	FmtDebug(d_vis_client, "[{}] VisualizationClient::OnSocketReady("
+		 "this:{},tid:{},state:{},flags:{})", NowTicks(), (size_t)this,
+		 std::this_thread::get_id(), (int)protocol_state, flags);
+
+	switch (protocol_state) {
+	case ProtocolState::Init: {
+
+		if (0 == (flags & SocketEvent::READ)) {
+			FmtError(d_vis_client, "In state Init, got flags {} ("
+				 "which do not contain READ/POLLIN); in this "
+				 "state we expect a CLIHLO message.", flags);
+			Shutdown();
+			return;
+		}
+
+		if (flags & (SocketEvent::ERROR|SocketEvent::HANGUP)) {
+			FmtError(d_vis_client, "In state Init, got flags {} "
+				 "which contains ERROR and/or HANGUP, "
+				 "shutting-down.", flags);
+			Shutdown();
+			return;
+		}
+
+		// Will invoke `OnSocketInput()`
+		BufferedSocket::OnSocketReady(flags);
+		break;
+	}
+	case ProtocolState::SrvHlo:
+		HandleSrvHlo(flags);
+		break;
+
+	case ProtocolState::FrameReady:
+		HandleFrameReady(flags);
+		break;
+
+	default:
+		FmtError(d_vis_client, "VisualizationClient::OnSocketReady("
+			 "tid: {}, flags: {}) invoked in state {}-- BAILING!",
+			 std::this_thread::get_id(), flags, (int)protocol_state);
+		Shutdown();
+		return;
+	}
+
+}
+
+/**
+ * \brief Update our sound analysis
+ *
+ *
+ * \return true if the analysis was successfully carried-out, false if it was
+ * not
+ *
+ *
+ * This method could fail to update the analysis for a few reasons:
+ *
+ * - the plugin could have been closed (in which case this implementation will
+ *   shift to state ProtocolClosed) - the cache could not contain PCM data for
+ *   the requested offset
+ *
+ * If this method returns true, the next FRAME is waiting in next_frame; the
+ * caller is responsible for scheduling a write.
+ *
+ *
+ */
+
+bool
+Visualization::VisualizationClient::ComposeSoundAnalysisFrame()
+{
+	using namespace std::chrono;
+
+	FmtDebug(d_vis_client, "[{}] VisualizationClient::"
+		 "ComposeSoundAnalysisFrame(this:{},tid:{},state:{})",
+		 NowTicks(), (size_t)this, std::this_thread::get_id(), (int)protocol_state);
+
+	if (!PluginIsOpen()) {
+		protocol_state = ProtocolState::ProtocolClosed;
+		return false;
+	}
+
+	auto now = system_clock::now();
+	HavePcmData &pcm_data = std::get<HavePcmData>(pcm_state);
+	if (!pcm_data.analysis.Update(now + timings->tau)) {
+		return false;
+	}
+
+	/* At this point, the data we wish to transport on the wire is residing
+	 * inside `pcm_data.analysis`. It needs to be transformed into it's
+	 * interchange format (IEEE 754, big-endian, single precision), and
+	 * moved into a buffer laid-out according to the protocol. That's one
+	 * copy. I don't want to spend a lot of time optimizing this right now,
+	 * but I'd like to avoid a second one-- we'll ask the `SoundAnalysis` to
+	 * `transform()` the data with a unary operator & output iterator we
+	 * provide. */
+
+	SerializeSoundInfoFrame(pcm_data.analysis, back_inserter(next_frame));
+	return true;
+}
+
+/**
+ * \brief Handle the first frame-- if tau < 0 schedule the timer for -tau ms,
+ * else write a frame immediately
+ *
+ *
+ * If \c tau is less than zero, schedule a timer for -tau ms and shift state to
+ * Waiting.
+ *
+ * If \c tau is non-negative, attempt to carry-out a sound analysis.
+ *
+ * If that succeeds, schedule a write of the newly-populated frame buffer,
+ * schedule a write, and shift to state FrameReady.
+ *
+ * If the analysis failes, cancel any writes, schedule the timer for \a freq ms,
+ * and shift to state Waiting.
+ *
+ *
+ */
+
+void
+Visualization::VisualizationClient::HandleFirstFrame()
+{
+	auto tau = timings->tau;
+	auto freq = timings->freq;
+	if (tau < std::chrono::milliseconds::zero()) {
+		FmtDebug(d_vis_client, "[{}] VisualizationClient::"
+			 "HandleFirstFrame([this:{}]) scheduling a write for "
+			 "{} ms from now & transitioning to state {}.",
+			 NowTicks(), (size_t)this, -tau.count(),
+			 (int)ProtocolState::Waiting);
+		timer.Schedule(std::chrono::milliseconds(-tau));
+		protocol_state = ProtocolState::Waiting;
+	}
+	else {
+		if (ComposeSoundAnalysisFrame()) {
+			FmtDebug(d_vis_client, "[{}] VisualizationClient::"
+				 "HandleFirstFrame(this:{}) carried out sound "
+				 "analysis, scheduled a write & is shifting to "
+				 "state {}.", NowTicks(), (size_t)this,
+				 (int)ProtocolState::FrameReady);
+			event.ScheduleWrite();
+			timer.Schedule(std::chrono::milliseconds(freq));
+			protocol_state = ProtocolState::FrameReady;
+		} else {
+			FmtDebug(d_vis_client, "[{}] VisualizationClient::"
+				 "OnPluginOpened(this:{}) failed to perform "
+				 "sound analysis; cancelling any outstanding "
+				 "writes, scheduling another attempt for {}ms "
+				 "from now & shifting to state {}.",
+				 NowTicks(), (size_t)this, freq.count(),
+				 (int)ProtocolState::Waiting);
+			event.CancelWrite();
+			timer.Schedule(std::chrono::milliseconds(freq));
+			protocol_state = ProtocolState::Waiting;
+		}
+	}
+}
+
+/**
+ * \brief Handle socket events when in state FrameReady
+ *
+ *
+ * \brief flags Flags indicating the nature of the socket event that occasiioned
+ * this call
+ *
+ *
+ * This function will handle errors, hangups, and writes. In the last case, it
+ * will attempt to write the contents of next_frame. If successful, it will
+ * shift state to Waiting.
+ *
+ *
+ */
+
+void
+Visualization::VisualizationClient::HandleFrameReady(unsigned flags)
+{
+	if (0 == (flags & SocketEvent::WRITE)) {
+		FmtError(d_vis_client, "In state FrameReady, got flags {} "
+			 "(which do not contain WRITE/POLLOUT); in this state "
+			 "we expect to be sending a sound analysis message.",
+			 flags);
+		Shutdown();
+		return;
+	}
+
+	if (flags & (SocketEvent::ERROR|SocketEvent::HANGUP)) {
+		FmtError(d_vis_client, "In state FrameReady, got flags {} which "
+			 "contains ERROR and/or HANGUP, shutting-down.",
+				 flags);
+		Shutdown();
+		return;
+	}
+
+	if (!WriteFrame()) {
+		return;
+	}
+
+	// Timer should already be active
+	protocol_state = ProtocolState::Waiting;
+}
+
+/**
+ * \brief Handle socket events while in state SrvHlo
+ *
+ *
+ * \brief flags Flags indicating the nature of the socket event that occasiioned
+ * this call
+ *
+ *
+ * This method expects the event to be a "write ready" and responds by writing
+ * the contents of next_frame (presumably an SRVHLO message). If successful, and
+ * the plugin is open, it will handle first frame chores. If the plugin is
+ * closed, it will shift to state ProtocolClosed.
+ *
+ *
+ */
+
+void
+Visualization::VisualizationClient::HandleSrvHlo(unsigned flags)
+{
+	if (0 == (flags & SocketEvent::WRITE)) {
+		FmtError(d_vis_client, "In state SrvHlo, got flags {} (which "
+			 "do not contain WRITE/POLLOUT); in this state we "
+			 "expect to be sending an SRVHLO message.", flags);
+		Shutdown();
+		return;
+	}
+
+	if (flags & (SocketEvent::ERROR|SocketEvent::HANGUP)) {
+		FmtError(d_vis_client, "In state SrvHlo, got flags {} which "
+			 "contains ERROR and/or HANGUP, shutting-down.",
+			 flags);
+		Shutdown();
+		return;
+	}
+
+	// The SRVHLO should be waiting for us in `next_frame`
+	if (!WriteFrame()) {
+		return;
+	}
+
+	if (PluginIsOpen()) {
+		HandleFirstFrame();
+	} else {
+		FmtDebug(d_vis_client, "[{}] VisualizationClient::"
+			 "HandleSrvHlo(): The visualization plugin is "
+			 "closed; shifting to state {}.",
+			 NowTicks(), (int)ProtocolState::ProtocolClosed);
+		protocol_state = ProtocolState::ProtocolClosed;
+		event.CancelWrite();
+	}
+}
+
+void
+Visualization::VisualizationClient::LogSocketWriteError(
+	const socket_error_t &err) const noexcept
+{
+	if (IsSocketErrorSendWouldBlock(err)) {
+		LogNotice(d_vis_client, "OnSocketReady invoked, but write "
+			  "would block(!)");
+		return;
+	} else if (!IsSocketErrorClosed(err)) {
+		SocketErrorMessage msg(err);
+		FmtWarning(d_vis_client, "Failed to write to client: {}",
+			   (const char *)msg);
+	}
+}
+
+/* Timer callback-- invoked when it's time to compose the next sound analysis
+ * frame. This will re-schedule the timer regardless of success or failure of
+ * the sound analysis. */
+void
+Visualization::VisualizationClient::OnTimer() noexcept
+{
+	FmtDebug(d_vis_client, "[{}] VisualizationClient::OnTimer(this:{},"
+		 "tid:{},state:{})", NowTicks(), (size_t)this, std::this_thread::get_id(),
+		 (int)protocol_state);
+
+	if (ComposeSoundAnalysisFrame()) {
+		FmtDebug(d_vis_client, "VisualizationClient::OnTimer() "
+			 "carried-out sound analysis, scheduled a write, "
+			 "and shifted to state {}.",
+			 (int)ProtocolState::FrameReady);
+		event.ScheduleWrite();
+		protocol_state = ProtocolState::FrameReady;
+	} else {
+		// Give up for now-- wait for the next timer event
+		FmtDebug(d_vis_client, "VisualizationClient::OnTimer() "
+			 "failed to carry-out sound analysis; cancelling "
+			 "outstanding writes, shifting to state {}.",
+			 (int)ProtocolState::Waiting);
+		event.CancelWrite();
+		protocol_state = ProtocolState::Waiting;
+	}
+
+	timer.Schedule(timings->freq);
+}
+
+void
+Visualization::VisualizationClient::Shutdown() noexcept
+{
+	timer.Cancel();
+	event.CancelRead();
+	event.CancelWrite();
+	BufferedSocket::Close();
+	pcm_state = std::monostate{};
+	protocol_state = ProtocolState::Done;
+}
+
+bool
+Visualization::VisualizationClient::WriteFrame()
+{
+	ssize_t cb_written = GetSocket().Write({next_frame.begin(),
+			next_frame.end()});
+	if (0 > cb_written) {
+		LogSocketWriteError(GetSocketError());
+		Shutdown();
+		return false;
+	}
+
+	ssize_t cb_expected = next_frame.end() - next_frame.begin();
+
+	/* Handle the case of a partial write. The SRVHLO frame is always seven
+	   octets in size. */
+	if (cb_written < cb_expected) {
+		FmtWarning(d_vis_client, "VisualizationClient::WriteFrame() "
+			   "wrote {} bytes of message-- expected {}.",
+			   cb_written, cb_expected);
+		/* It's no problem, just remove the bytes that have been written
+		 * from `next_frame`, schedule another write & bail. */
+		next_frame.erase(next_frame.begin(),
+				 next_frame.begin() + cb_written);
+		event.ScheduleWrite();
+		return false;
+	}
+
+	/* Finally, we should handle the case of `cb_written > 7`. Naturally,
+	 * that "should" never happen, but I just can't leave the case
+	 * uncovered. One could argue that an assertion would be justified, but
+	 * I understand the maintainers to frown on assertions in production
+	 * code, so: */
+	if (cb_written > cb_expected) {
+		FmtError(d_vis_client, "VisualizationClient::HandleSrvHlo() "
+			 "wrote {} bytes, but {} were reported to have been "
+			 "written-out. This should be investigated.",
+			 cb_written, cb_expected);
+	}
+
+	FmtDebug(d_vis_client, "[{}] VisualizationClient::WriteFrame(tid:{},"
+		 "state:{}) wrote {} bytes (of {}); cancelling any outstanding "
+		 "writes & clearing the frame buffer.", NowTicks(), std::this_thread::get_id(),
+		 (int)protocol_state, cb_written, cb_expected);
+
+	event.CancelWrite();
+	next_frame.clear();
+
+	return true;
+}
diff --git a/src/output/plugins/visualization/VisualizationClient.hxx b/src/output/plugins/visualization/VisualizationClient.hxx
new file mode 100644
index 0000000000..1d4945b3ce
--- /dev/null
+++ b/src/output/plugins/visualization/VisualizationClient.hxx
@@ -0,0 +1,273 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright The Music Player Daemon Project
+
+#ifndef VISUALIZATION_CLIENT_HXX_INCLUDED
+#define VISUALIZATION_CLIENT_HXX_INCLUDED 1
+
+#include "SoundAnalysis.hxx"
+#include "Protocol.hxx"
+
+#include "event/BufferedSocket.hxx"
+#include "event/FineTimerEvent.hxx"
+#include "net/SocketError.hxx"
+#include "net/UniqueSocketDescriptor.hxx"
+
+#include <cstddef>
+#include <optional>
+#include <vector>
+
+namespace Visualization {
+
+class SoundInfoCache;
+
+/**
+ * \class VisualizationClient
+ *
+ * \brief Represents a TCP connection to one visualization client
+ *
+ *
+ * This class implements the server side of the MPD visualization protocol,
+ * version 1, for a single client.
+ *
+ * The \ref vis_out_plugin_protocol_proto "protocol" suggests a finite state
+ * machine (FSM):
+ *
+ * events:
+ *
+ * - read ready
+ * - write complete
+ * - timer fired
+ * - plugin opened
+ * - plugin closed
+ *
+ * actions
+ *
+ * - schedule write
+ * - schedule read
+ * - cancel write
+ * - cancel read
+ * - schedule timer(interval)
+ *
+ \code
+                          +------+
+                    +---->| Init | (read scheduled)
+        read ready, |     +------+
+        need more   |      |   |
+                    +------+   | read ready, recv CLIHLO,
+                               | schedule a write
+                               |
+                               v
+                          +--------+
+                     +--> | SRVHLO |-----------------------------+
+      write complete,|    +--------+                             |
+      more to write  |     | |    |                              | write complete,
+                     +-----+ |    |                              | plugin closed
+                             |    +----+                         | (cancel write)
+                             |         | write complete,         |
+                        +----+         | plugin open,            |
+ write complete,        |              | tau < 0                 |
+   plugin open,         |              | (cancel write)          |
+   tau >= 0             |              | (schedule timer(-tau))  |
+ (update analysis)      |              |     or                  |
+ (schedule write)       |              | failed analysis         v
+ (schedule timer(freq)) |              | (cancel write)         +--------+
+                        |              | (schedule timer(freq)) | Closed |
+                        v              v                        +--------+
+             +------------+          +---------+
+       +---->| FrameReady |          | Waiting |<----+
+       |     +------------+          +---------+     |
+       |      |    ^   |               ^  |  |       |
+       +------+    |   |               |  |  |       |
+ write complete,   |   +---------------+  |  +-------+
+ more to write     |    write complete    |       no sound analysis
+                   |    (cancel write)    |    (schedule timer(freq))
+                   |                      |
+                   +----------------------+
+                        timer fired
+                   (schedule timer(freq))
+                      (schedule write)
+
+ \endcode
+ *
+ * This is complicated by the fact that the output plugin that owns us may, at
+ * any given point in time, be "open" or "closed"; it is only when open that we
+ * know the format of the PCM data being played, and hence that this client may
+ * have a reference to the PCM data cache along with a `SoundAnalysis` instance
+ * necessary for performing sound analsysis.
+ *
+ * 1. instances start life waiting for the CLIHLO message (state :=> Init)
+ *
+ * 2. on read ready (state must be Init):
+ *
+ *    1) complete the read
+ *
+ *    2) compose the SRVHLO message
+ *
+ *    3) schedule a write
+ *
+ *    4) state :=> SrvHlo
+ *
+ * 3. on write ready (state must be SrvHlo)
+ *
+ *    1) write the current frame
+ *
+ *    2) branch:
+ *
+ *       - if the plugin is closed, state :=> Closed
+ *       - if the client needs the first frame & the plugin is open
+ *         + compose the frame
+ *         + schedule a write
+ *         + state :=> FrameReady
+ *       - else
+ *         + schedule the timer for -tau ms
+ *         + state :=> Waiting
+ *
+ * 3. on write ready (state must be FrameReady)
+ *
+ *    1) write the current frame
+ *
+ *    2) schedule the timer for 1/fps ms
+ *
+ *    3) state :=> Waiting
+ *
+ * 4. on timer firing (state must be Waiting)
+ *
+ *    1) if the plugin is open:
+ *
+ *       - compose the next frame
+ *       - schedule a write
+ *       - state :=> FrameReady
+ *
+ *
+ */
+
+class VisualizationClient : BufferedSocket {
+
+	Visualization::SoundAnalysisParameters sound_params;
+	size_t num_samp;
+
+	/// Data available to us when the visualization output plugin is open
+	struct HavePcmData {
+		// I wish C++ had a `not_null` class
+		std::shared_ptr<Visualization::SoundInfoCache> pcache;
+		Visualization::SoundAnalysis analysis;
+	};
+	/// Plugin open/closed state-- cf. PluginIsOpen()
+	std::variant<std::monostate, HavePcmData> pcm_state;
+
+	/// The protocol can be represented as an FSM
+	enum class ProtocolState {
+		/* FSM initial state; the socket has been established, but no
+		 * communication has taken place; we are expecting a CLIHLO
+		 * message to arrive (i.e. a READ/POLLIN notification) */
+		Init,
+		/* CLIHLO has arrived, we've composed the SRVHLO and are waiting
+		 * for the socket to become available for write */
+		SrvHlo,
+		/* The handshake has been completed, but the plugin is currently
+		 * closed, so we can't perform sound analysis */
+		ProtocolClosed,
+		/// Handshake complete, waiting for the timer to fire
+		Waiting,
+		/* Handshake complete, frame composed, waiting for the socket to
+		 * become available for write */
+		FrameReady,
+		/// The socket has been closed and this instance may be reaped
+		Done,
+	} protocol_state;
+
+	/// Information available to us once we've parsed the CLIHLO message
+	struct HaveClientInfo {
+		std::chrono::milliseconds tau;
+		std::chrono::milliseconds freq; // 1/fps
+	};
+	/* A tuple whose first member is the offset from song time at which this
+	 * client has requested sound analysis, and the second is the interval
+	 * at which frames shall be sent (1/fps)-- both are only available to us
+	 * after the CLIHLO message has been parsed and we are in state SrvHlo
+	 * or later. */
+	std::optional<HaveClientInfo> timings;
+	/// Timer governing frame transmission
+	FineTimerEvent timer;
+	/* Next frame to be transmitted (if any) in serialized format
+	 * (i.e. ready to be written directly); empty `vector` denotes no such
+	 * frame */
+	std::vector<std::byte> next_frame;
+
+public:
+	/* Constructor invoked when a new client connects & the plugin is
+	   closed */
+	VisualizationClient(
+		UniqueSocketDescriptor fd, EventLoop &event_loop,
+		const Visualization::SoundAnalysisParameters &params);
+	/// Constructor invoked when a new client connects & the plugin is open
+	VisualizationClient(
+		UniqueSocketDescriptor fd, EventLoop &event_loop,
+		const Visualization::SoundAnalysisParameters &params,
+		const std::shared_ptr<Visualization::SoundInfoCache> &pcache);
+	virtual ~VisualizationClient();
+
+	/// Invoked by the server when the plugin is opened
+	void OnPluginOpened(
+		const std::shared_ptr<Visualization::SoundInfoCache> &pcache);
+	/// Invoked by the server when the plugin is closed
+	void OnPluginClosed();
+	bool
+	IsClosed() const noexcept {
+		return ProtocolState::Done == protocol_state;
+	}
+
+protected:
+
+	/////////////////////////////////////////////////////////////////////////
+	//		BufferedSocket interface			       //
+	/////////////////////////////////////////////////////////////////////////
+
+	virtual BufferedSocket::InputResult
+	OnSocketInput(void *data, size_t length) noexcept override;
+	virtual void OnSocketError(std::exception_ptr ep) noexcept override;
+	virtual void OnSocketClosed() noexcept override;
+
+	/**
+	 * Invoked when an event has occurred on this socket. \a flags
+	 * will be a bitmask made of members of the EPollEvents enumeration.
+	 * For reference:
+	 *
+	 * - READ = EPOLLIN = 1
+	 * - WRITE = EPOLLOUT = 4
+	 * - ERROR = EPOLLERR = 8
+	 * - HANGUP = EPOLLHUP = 16
+	 *
+	 */
+	virtual void OnSocketReady(unsigned flags) noexcept override;
+
+private:
+
+	/// Update our sound analysis
+	bool ComposeSoundAnalysisFrame();
+	/* Handle the first frame-- if tau < 0 schedule the timer for -tau ms,
+	 * else write a frame immediately */
+	void HandleFirstFrame();
+	/// Handle a socket event while in state FrameReady
+	void HandleFrameReady(unsigned flags);
+	/// Handle a socket event while in state SrvHlo
+	void HandleSrvHlo(unsigned flags);
+	/// Utility function-- log a socket_error_t after an attempted write
+	void LogSocketWriteError(const socket_error_t &err) const noexcept;
+	/* Timer callback-- invoked when it's time to compose the next sound
+	 * analysis frame */
+	void OnTimer() noexcept;
+	bool
+	PluginIsOpen() const {
+		return 0 != pcm_state.index();
+	}
+	/* Close our underlying socket, drop our shared cache & shift state to
+	 * Done */
+	void Shutdown() noexcept;
+	bool WriteFrame();
+
+};
+
+} // namespace Visualization
+
+#endif // VISUALIZATION_CLIENT_HXX_INCLUDED
diff --git a/src/output/plugins/visualization/VisualizationOutputPlugin.cxx b/src/output/plugins/visualization/VisualizationOutputPlugin.cxx
new file mode 100644
index 0000000000..5db094cc9c
--- /dev/null
+++ b/src/output/plugins/visualization/VisualizationOutputPlugin.cxx
@@ -0,0 +1,712 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright The Music Player Daemon Project
+
+#include "VisualizationOutputPlugin.hxx"
+#include "SoundAnalysis.hxx"
+#include "SoundInfoCache.hxx"
+#include "VisualizationServer.hxx"
+
+#include "Log.hxx"
+#include "config/Block.hxx"
+#include "event/Call.hxx"
+#include "lib/fmt/ThreadIdFormatter.hxx"
+#include "output/Interface.hxx"
+#include "output/OutputPlugin.hxx"
+#include "util/Domain.hxx"
+
+#include <chrono>
+
+namespace Visualization {
+
+/**
+ * \page vis_out_protocol Visualization Network Protocol
+ *
+ * See \ref vis_out "RFC: Visualization Output Plugin" for background.
+ *
+ *
+ * \section vis_out_protocol_timing Timing
+ *
+ * In order to deliver sound data to the client at the proper time, the protocol
+ * needs to take into account:
+ *
+ * - network latency: the delta between writing the sound data to the socket &
+ *   its receipt at the client
+ *
+ * - player buffering: the player may buffer sound data (mplayer, for instance,
+ *   buffers half a second's worth of audio before beginning playback by
+ *   default)
+ *
+ * - render time: the client presumably wishes the current frame to appear
+ *   on-screen at the moment the current sound information is ending
+ *
+ * Throughout, let \e t be "song time" be measured on the server, and T(t) be
+ * sound information for song time \e t. Let FPS be the frames-per-second at
+ * which the client would like to render.
+ *
+ * Then, at an interval of 1/FPS seconds, the server needs to write
+ *
+ \verbatim
+       T(t - {buffer time} + {render time} + {one way latency})
+ \endverbatim
+ *
+ * to the client socket. If we denote that time offset (i.e. the render time +
+ * one-way latency minus the buffer time) by tau, then the server should wait
+ * max(0, -tau) ms to write the first frame.
+ *
+ * A few examples will illustrate.
+ *
+ * \subsection vis_out_protocol_timing_eg_1 Example 1
+ *
+ * Let the client render time be 4ms and round-trip network latency be
+ * 6ms. Assume no player buffering. In order to render a frame corresponding to
+ * song time \e t, the client would need, at time \e t - 4 ms, sound information
+ * corresponding to time \e t, or T(t). The server would need to \e send that
+ * information at time \e t - 7ms (half of one round-trip plus render time).
+ *
+ * In other words, on the server side at song time \e t, we would need to write
+ * T(t + 7ms) to the client socket. If the server writes T(t+7ms) immediately,
+ * the client will receive it at \e t + 4ms, take 4ms to render the next frame,
+ * and so at \e t + 7ms hence, finish rendering T(t+7).
+ *
+ * \subsection vis_out_protocol_timing_eg_2 Example 2
+ *
+ * Imagine we are running the same client on a laptop, connected to an MPD
+ * server over the internet, and using mplayer as the player. This gives 500ms
+ * of buffer time.  Let us assume the same 4ms render time, but now a 20ms
+ * round-trip time.
+ *
+ * In order to render a frame corresponding to song time \e t, the client would
+ * need, at time \e t - 4ms, T(t). This would need to be sent from the server at
+ * time \e t - 14ms. We now need to incorporate the client-side buffering,
+ * however. Song time \e t will be actually played on the client at \e t + 500ms
+ * on the server.
+ *
+ * In other words, on the server side at song time \e t, we would need to write
+ * T(t-486ms) to the client socket.
+ *
+ * Since the sound won't start on the client for 0.5 sec, it would make no sense
+ * to begin writing sound information for 486ms. Let t(0) be the moment the
+ * client connects and the player begins buffering. If, at t(0) + 486ms, the
+ * server writes T(t(0)), the client will receive it at t(0) + 496ms & complete
+ * rendering it at t(0) + 500ms, which is when the client-side player will
+ * begin playing song time t(0).
+ *
+ * \section vis_out_protocol_proto The Protocol
+ *
+ * \subsection vis_out_protocol_proto_design Design
+ *
+ * The author is unaware of any existing network protocols in this area, so he
+ * designed his own after reviewing the Shoutcast & Ultravox
+ * protocols. Experience with the TLS & 802.11 protocols also informed this
+ * design.
+ *
+ * Design goals include:
+ *
+ * - client convenience
+ *   - this in particular drove the choice to stream updates; everything
+ *     needed to simply push the data out is knowable at handshake time,
+ *     so why force the client to send a request?
+ * - efficiency on the wire
+ *   - binary format
+ *   - streaming preferred over request/response
+ * - future extensibility
+ *   - protocol versioning built-in from the start
+ * - parsing convenience
+ *   - streaming messages come with a few "magic bytes" at the start
+ *     to assist clients in "locking on" to the stream & recovering from
+ *     corrupted data, client-side disruptions & so forth
+ *   - all messages conform to the "type-length-value" (TLV) format
+ *     beloved of parser writers
+ *
+ * Responses to the intial
+ * <a href="https://github.com/MusicPlayerDaemon/MPD/pull/1449">RFC</a> also
+ * informed the protocol's first implementation: I've stripped out all but the
+ * essentials in pursuit of a minimally effective protocol that is still
+ * extensible should it prove useful
+ *
+ *
+ * \subsection vis_out_protocol_proto_overview Overview
+ *
+ * The protocol is a combination of request/response as well as streaming. After
+ * an initial handshake (client goes first) the server will begin streaming
+ * messages to the client; i.e. at the interval the client specified during the
+ * initial handshake the server will send FRAME messages containing sound
+ * information useful for visualizers. The client need not request these
+ * messages or does the client need to acknowledge them in any way.
+ *
+ * Schematically, a conversation looks like this:
+ *
+ \verbatim
+   Client                                                   Server
+
+   desired protocol version
+   tau (buffer offset)
+   frame rate               --------- CLIHLO --------->
+     ...
+
+                            <-------- SRVHLO --------- offered protocol version
+
+                            <-------- FRAME ---------  samples, spectrum
+                                                       | 1/fps sec
+                            <-------- FRAME ---------  samples, spectrum
+                                       ...
+                                     (forever)
+ \endverbatim
+ *
+ * There is no formal "close" or "teardown" message; each side simply detects
+ * when the other has gone away & treats that as the end of the conversation.
+ *
+ *
+ * \subsection vis_out_protocol_proto_msgs Messages
+ *
+ * All messages:
+ *
+ * - integers use network byte order (i.e. big endian)
+ * - use TLV format (streaming messages prepend magic bytes)
+ *
+ \verbatim
+
+   +---------+-----------------------+-----------------+-----------------------+--------+
+   |(prefix) | TYPE (16-bit unsigned)|     LENGTH      |    PAYLOAD            |  CHECK |
+   |         | class | message type  | 16-bits unsigned| LENGTH bytes          | 1 byte |
+   |---------|-------+---------------|-----------------|-----------------------+--------+
+   |63ac84003| 4 bits|   12 bits     | (max len 65535) | format is msg-specfic |    00  |
+   +---------+-----------------------+-----------------+-----------------------+--------+
+
+ \endverbatim
+ *
+ * Notes:
+ *
+ * - the prefix is only prepended to FRAME messages to enable clients to "lock
+ *   on" to a pre-existing stream of data; 0x63ac4003 were the first four bytes
+ *   I pulled from \c /dev/urandom on my dev workstation on Monday, September 04.
+ *
+ * - the message type is comprised of two values packed into a u16_t:
+ *
+ *   - class: (type & 0xf000) >> 12:
+ *     - 00: handshake
+ *     - 01: streaming (FRAME, e.g.)
+ *
+ *   - message type: (type & 0ffff) see below for values
+ *
+ * - the "length" field is the length of the \e payload \e only (i.e. \e not the
+ *   length of the entire message)
+ *
+ * - the "check" byte is intended as a sanity test & shall always be zero
+ *   Although, what would the client do if the check failed? There's no
+ *   provision in this protocol to re-request the frame. Discard it, I suppose.
+ *
+ * The following subsections define the PAYLOAD portion of the above messages.
+ *
+ * \subsubsection vis_out_protocol_proto_clihlo CLIHLO
+ *
+ * No prefix. The class is 0x0 (handshake) & the message type is 0x000.
+ *
+ * Payload:
+ *
+ \verbatim
+
+  +---------------+---------------+---------------+---------------+
+  | major version | minor version | requested FPS | requested TAU |
+  | ------------- | ------------- |-------------- |---------------+
+  |    uint8_t    |    uint8_t    |   uint16_t    |  int16_t      |
+  +---------------+---------------+---------------+---------------+
+
+ \endverbatim
+ *
+ * Payload size: 6 octets
+ *
+ * \subsubsection vis_out_protocol_proto_srvhlo SRVHLO
+ *
+ * No prefix. The class is 0x0 (handshake) & the message type is 0x001.
+ *
+ * Payload:
+ *
+ \verbatim
+
+  +---------------+---------------+
+  | major version | minor version |
+  | ------------- | ------------- |
+  |    uint8_t    |    uint8_t    |
+  +---------------+---------------+
+
+ \endverbatim
+ *
+ * \subsubsection vis_out_protocol_proto_frame FRAME
+ *
+ * Prefix. The class is 0x1 (streaming) & the message type is 0x000.
+ *
+ * Below, \c float denotes a floating-point value, expressed in IEEE 754
+ * single-precision format, in big-endian byte order. \c complex denotes a pair
+ * of floating-point values (the real & imaginary components of a complex
+ * number, in that order) in the same format.
+ *
+ * Payload:
+ *
+ \code
+
+  +----------+----------+-------------+-----------+----------+---------+---------+----------+------------+---------------+-----------------+
+  | num_samp | num_chan | sample_rate | waveforms | num_freq | freq_lo | freq_hi | freq_off |   coeffs   | power_spectra | bass/mids/trebs |
+  | -------- | -------- | ----------- | --------- | -------- | ------- | ------- | -------- | ---------- | ------------- | --------------- |
+  | uint16_t |  uint8_t |  uint16_t   | see below | uint16_t |  float  |  float  | uint16_t | see below  |  see below    |   see below     |
+  +----------+----------+-------------+-----------+----------+---------+---------+----------+------------+---------------+-----------------+
+
+  waveforms:
+
+  +----------------------+----------------------+-----+---------------------------------+
+  | waveform for chan. 0 | waveform for chan. 1 | ... | waveform for chan. num_chan - 1 |
+  | -------------------- | -------------------- | ... | ------------------------------- |
+  | float |  ... | float | float |  ... | float | ... | float |  ...            | float |
+  | -------------------- | -------------------- | ... | ------------------------------- |
+  |  (num_samp floats)   |   (num_samp floats)  | ... |   (num_samp floats)             |
+  +----------------------+----------------------+-----+---------------------------------+
+
+      total: num_samp * num_chan * 4 octets
+
+  coeffs:
+
+  +--------------------------+--------------------------+-----+-------------------------------------+
+  | freq. domain for chan. 0 | freq. domain for chan 1. | ... | freq. domain for chan. num_chan - 1 |
+  | ------------------------ + -------------------------+---- + ----------------------------------- |
+  | complex |  ... | complex | complex |  ... | complex | ... | complex | complex |  ...  | complex |
+  | ------------------------ +--------------------------+-----+-------------------------------------|
+  |     num_freq complex     |  num_freq complex        | ... | num_freq complex                    |
+  +--------------------------+--------------------------+-----+-------------------------------------+
+
+      total: num_chan * num_freq * 8 octets
+
+  power spectra:
+
+  +-----------------------------+-----+---------------------------------------+
+  | power spectrum for chan. 0  | ... | power spectrum for chan. num_chan - 1 |
+  | --------------------------- +-----+ ------------------------------------- |
+  | float | float | ... | float | ... | float | float |    ...        | float |
+  | --------------------------- + --- + ------------------------------------- |
+  |       num_freq floats       | ... |     num_freq floats                   |
+  +-----------------------------+-----+---------------------------------------+
+
+      total: num_chan * num_freq * 4 octets
+
+  bass/mids/trebs
+
+  +-----------------------------+-----+----------------------------------------+
+  | bass/mids/trebs for chan. 0 | ... | bass/mids/trebs for chan. num_chan - 1 |
+  | --------------------------- +-----+ -------------------------------------- |
+  |   float  |  float  |  float | ... |   float  |   float  |   float          |
+  +-----------------------------+-----+----------------------------------------+
+
+      total: num_chan * 12 octets
+
+  payload size: 17 + num_samp * num_chan * 4  + num_chan * num_freq * 8  + num_chan * num_freq * 4 + num_chan * 12
+             = 17 + 4 * num_chan * (num_samp + 3 * num_freq + 3)
+
+ \endcode
+ *
+ * - \c num_samp: the number of audio samples used in this analysis: this is set
+ *   in plugin confiugration and in practice needn't be particularly large (512
+ *   is the default setting). This determines the number of values in
+ *   \c waveforms, and in part the number of values in \c frequencies and
+ *   \c power_spectra (see below)
+ *
+ * - \c num_chan: the number of audio channels used in this analysis: this is
+ *   determined by the audio stream being played at any given time, but 2
+ *   (i.e. stereo) is typical
+ *
+ * - \c sample_rate: the number of samples per second at which this audio stream
+ *   is encoded (44100 is typical)
+ *
+ * - \c waveforms: the PCM data on which this analysis was based; there will be
+ *   \c num_chan sets of num_samp floats (one for each channel, arranged one
+ *   after the other; i.e. not interleaved)
+ *
+ * - \c num_freq: the number of frequency values returned for each waveform in
+ *   this frame; this is a function the sample rate, the number of audio
+ *   samples, and the frequency cutoffs with which the plugin was configured (on
+ *   which more below)
+ *
+ * - \c freq_lo, \c freq_hi: the frequency range returned; this is set in plugin
+ *   configuration.  The range of human perception is roughly 200Hz to 20,000Hz,
+ *   but in practice musical sound data contains little information above 10-12K
+ *   Hz, so a typical setting for this range is 200Hz and 10000Hz.
+ *
+ * - \c freq_off: the index corresponding to \c freq_lo; this can be used by the
+ *   caller to map a Fourier coefficient to a frequency (see \c coeffs, below)
+ *
+ * - \c coeffs: the Fourier coefficients for each waveform, expressed as complex
+ *   numbers; the i-th value in this range is the \c freq_off + \c i -th Fourier
+ *   coefficient, corresponding to a frequency of
+ *
+     \code
+
+       (freq_off +  i) * samp_rate
+       ---------------------------   Hz
+                num_samp
+
+     \endcode
+ *
+ * The reason for this convention is that the plugin will _only_ return the
+ * Fourier coefficients within the ranage defined by \c freq_lo & \c freq_hi.
+ *
+ * Note that Discrete Fourier Transforms of real-valued series (such as our PCM
+ * waveform) display the Hermitian property:
+ *
+ \code
+                  *
+     C(i) = C(n-i)
+
+ \endcode
+ *
+ * where '*' denotes complex conjugation. Many libraries take advantage of this
+ * to save space by only returning the first n/2 + 1 Fourier coefficients (since
+ * the remaining coefficients can be readily computed from those). The
+ * application of a frequency window spoils this nice symmetry.
+ *
+ * - \c power_spectra: the power spectrum for each channel; this is merely the
+ *   magnitude of the Fourier coefficent at each frequency. Strictly speaking
+ *   the client could compute this for themselves, but this is such a frequently
+ *   used value the plugin computes & transmits it as a convenience to the
+ *   caller, There are again \c num_freq values.
+ *
+ * - bass/mids/trebs: once the frequency domain is truncated to the given
+ *   bounds, the number of octaves therein is divided into three equal
+ *   bands and the power in each band is summed (this is done separately
+ *   for each channel)
+ *
+ * A number of these quantities won't change; they're defined in plugin
+ * configuration; \c num_samp, \c freq_lo & \c freq_hi could, in principle, be
+ * moved to the SRVHLO message.
+ *
+ * Furthermore, \c num_chan, \c sample_rate and hence \c num_freq are set at the
+ * start of each new audio stream, and so could be communicated once at that
+ * point & omitted from subsequent frames.
+ *
+ * That said, this would complicate client implementations for the sake of
+ * saving a few bytes on the wire; I've chosen to simply communicate this
+ * information in each frame.
+ *
+ *
+ */
+
+/**
+ * \page vis_out_arch Layout of the Visualization Output Plugin
+ *
+ * \section vis_out_arch_intro Introduction
+ *
+ * There are, at the time of this writing, two other output plugins that provide
+ * socket servers: HttpdOutput & SnapcastOutput. They both follow a similar
+ * pattern in which the plugin subclasses both AudioOutput \e and
+ * ServerSocket. Since I have chosen a different approach, I should both
+ * describe the layout of VisualizationOutput and explain my choice.
+ *
+ * \section vis_out_arch_cyclic Cyclic Dependencies
+ *
+ * While they subclass privately (implying an "implemented-in-terms-of" rather
+ * than "is-a" relationship with their superclasses), HttpdOutput &
+ * SnapcastOutput in practice handle the duties of being both an AudioOutput and
+ * a ServerSocket. This introduces not one but two cyclic dependencies in their
+ * implementations:
+ *
+ * 1. the ServerSocket half of them is responsible for creating new clients, but
+ * the clients are the ones who detect that their socket has been closed; they
+ * then need a back-reference to signal their parent that they should be
+ * destroyed (by calling RemoveClient() through their back-reference).
+ *
+ * 2. the AudioOutput half of them is responsible for pushing new data derived
+ * from PCM data out to all their clients, while their clients request
+ * information & service from their parent, again requiring a back reference
+ * (GetCodecName() on the Snapcast client, e.g.)
+ *
+ * Cyclic dependencies carry with them drawbacks:
+ *
+ * - they increase compilation times because when one file in the cycle is
+ *   changed, all the other translation units need to be recompiled
+ *
+ * - they increase coupling, increasing the chances that a change in
+ *   one place will break others
+ *
+ * - code reuse becomes more difficult-- trying to hoist one file out involves
+ *   bringing all the other files in the cycle along with it
+ *
+ * - unit testing becomes harder-- the smallest unit of testable
+ *   funcationality becomes the union all the the translation units in the
+ *   cycle
+ *
+ * \section vis_out_arch_threads Too Many Threads!
+ *
+ * This arrangement entails another problem: HttpdOutput & SnapcastOutput
+ * instances have their methods invoked on two threads; the main I/O thread as
+ * well as the player control thread. This means that access to some state needs
+ * to be guarded by a mutex (in the case of HttpdOutput, the client list & the
+ * pages), but \e not others (again in the case of HttpdOutput, content or
+ * genre).
+ *
+ * \section vis_out_arch_demotion Breaking Dependency Cyles Through Demotion
+ *
+ * I instead chose to have VisualizationOutput \e be an AudioOutput, and \e own
+ * a ServerSocket. The state & behavior required by both is pushed down into
+ * class SoundInfoCache on which both depend. This arrangement breaks things up
+ * in a few ways.
+ *
+ * Cycle 1 is broken up by having a one-way relationship only between the socket
+ * server & clients. When a client detects that its socket has been closed, it
+ * marks itself "dead" and will eventually be reaped by the server.
+ *
+ * Cycle 2 is broken by Lakos' method of "demotion": the functionality required
+ * by both the output plugin & the various clients is pushed down into a
+ * separate class SoundInfoCache. It is owned by the plugin, and referenced by
+ * clients. When the plugin is disabled, the plugin is responsible for
+ * cleaning-up the server, which will in turn clean-up all the clients, and only
+ * then destroying the SoundInfoCache instance.
+ *
+ * In ASCII art:
+ *
+ \verbatim
+    sound       +---------------------+               +---------------------+
+ -- data ---->  | VisualizationOutput | --- owns ---> | VisualizationServer |
+                +---------------------+               +---------------------+
+                | Play()              |               | OnAccept()          |
+                +---------------------+               +---------------------+
+                         1 |                                     | 1
+                           |                         +---owns----+
+                           |                         |
+                           |                         v *
+                           |               +---------------------+
+                          owns             | VisualizationClient |
+                           |               +---------------------+
+                           |                         | *
+                           |    +----references------+
+                           |    |
+                         1 v    v 1
+                    +----------------+
+                    | SoundInfoCache |
+                    +----------------+
+ \endverbatim
+ *
+ * This arrangement also addresses the threading issue: other than creation &
+ * destruction, the socket server has all of its methods invoked on the I/O
+ * thread, and those of the plugin on the player control thread. The state that
+ * needs to be guarded against access from multiple threads is localized in
+ * SoundInfoCache.
+ *
+ *
+ * \section vis_out_arch_promotion A Discarded Approach
+ *
+ * The \ref vis_out_back "idea" of having sound analysis accessible through the
+ * MPD client
+ * <a href="https://mpd.readthedocs.io/en/latest/protocol.html">protocol</a>
+ * to me begged the question: why not have SoundInfoCache be owned directly by
+ * MultipleOutputs? MPD clients could make requests directly via
+ *
+ \code
+   partition.outputs.sound_info_cache.analyze(...);
+ \endcode
+ *
+ * We could hand a reference to it to the visualization output plugin, and have
+ * the plugin be solely responsible for serving the network protocol.
+ *
+ * I saw a few advantages to this:
+ *
+ * 1. Convenient access for the implementations of MPD client protocol commands
+ *
+ * 2. Users could get sound analysis via the MPD client protocol without having
+ * to configure & enable an output plugin
+ *
+ * 3. General simplification-- the output plugin would only be responsible
+ * for serving the network protocol
+ *
+ * All that said, I discarded this approach. If I wanted the sound analysis to
+ * receive sound data post-cross-fade, post-replay gain and after any other
+ * filtering, it was going to need to own an AudioOutputSource instance. Thing
+ * is, when I open an AudioOutputSource I need:
+ *
+ * - the AudioFormat
+ * - a reference to the MusicPipe
+ * - the ReplayGain filter(s)
+ * - any other filters
+ *
+ * MultipleOutputs doesn't know these; it's just got a bunch of
+ * configuration. The configuration gets turned into these objects in
+ * FilteredAudioOutput::Setup() and it's non-trivial to do so. The plumbing is
+ * complex enough that I'm inclined to leave it where it is. So now we're at a
+ * point where SoundInfoCache would need to own both an AudioOutputSource \e and
+ * a FilteredAudioOutput... at which point it starts to look very much like an
+ * AudioOutputControl (in other words, just another audio output under
+ * MultipleOutputs).
+ *
+ *
+ */
+
+/**
+ * \class VisualizationOutput
+ *
+ * \brief An output plugin that serves data useful for music visualizers
+ *
+ * \sa \ref vis_out_plugin_arch "Architecture"
+ *
+ *
+ * Both the fifo & pipe output plugins can be used to directly access the PCM
+ * audio data, and so can (and have been) used to implement music visualizers
+ * for MPD. They are, however, limited to clients running on the same host as
+ * MPD. This output plugin will stream PCM samples along with derived
+ * information useful for visualizers (the Fourier transform, bass/mids/trebs,
+ * and so forth) over one or more network connections, to allow true MPD client
+ * visualizers.
+ *
+ *
+ */
+
+class VisualizationOutput: public AudioOutput {
+
+	/* When the plugin is enabled, we actually "open" the server (which is
+	 * to say, bind the socket & begin accepting incoming connections) */
+	VisualizationServer server;
+	/* This will be null unless the plugin is open; it's a `shared_ptr`
+	 * because we share references with the socket servers and the
+	 * `VisualizationClient` instances representing active connections */
+	std::shared_ptr<SoundInfoCache> pcache;
+	/// The number of seconds' worth of audio data to be cached
+	std::chrono::seconds cache_duration;
+
+public:
+	static AudioOutput* Create(EventLoop &event_loop,
+				   const ConfigBlock &cfg_block) {
+		return new VisualizationOutput(event_loop, cfg_block);
+	}
+	VisualizationOutput(EventLoop &event_loop,
+			    const ConfigBlock &cfg_block);
+
+	virtual ~VisualizationOutput() override; // We have virtuals, so...
+
+public:
+
+	////////////////////////////////////////////////////////////////////////
+	//		     AudioOutput Interface			      //
+	////////////////////////////////////////////////////////////////////////
+
+	/**
+	 * Enable the device.  This may allocate resources, preparing
+	 * for the device to be opened.
+	 *
+	 * Throws on error.
+	 */
+	virtual void Enable() override;
+
+	/**
+	 * Disables the device. It is closed before this method is called.
+	 */
+	virtual void Disable() noexcept override;
+
+	/**
+	 * Really open the device-- mandatory.
+	 *
+	 * Throws on error.
+	 *
+	 * @param audio_format the audio format in which data is going
+	 * to be delivered; may be modified by the plugin
+	 */
+	virtual void Open(AudioFormat &audio_format) override;
+
+	/**
+	 * Close the device-- mandatory.
+	 */
+	virtual void Close() noexcept override;
+
+	/**
+	 * Play a chunk of audio data-- mandatory. The method blocks until at
+	 * least one audio frame is consumed.
+	 *
+	 * Throws on error.
+	 *
+	 * May throw #AudioOutputInterrupted after Interrupt() has
+	 * been called.
+	 *
+	 * @return the number of bytes played (must be a multiple of
+	 * the frame size)
+	 */
+	virtual size_t Play(std::span<const std::byte> src) override;
+
+};
+
+} // namespace Visualization
+
+using std::make_unique;
+
+const Domain vis_output_domain("vis_output");
+
+Visualization::VisualizationOutput::VisualizationOutput(
+	EventLoop &event_loop,
+	const ConfigBlock &config_block):
+	AudioOutput(FLAG_ENABLE_DISABLE | FLAG_PAUSE),
+	server(event_loop,
+		   config_block.GetBlockValue("bind_to_address"),
+		   config_block.GetBlockValue("port", 8001U),
+		   config_block.GetPositiveValue("max_clients", 0),
+		   Visualization::SoundAnalysisParameters(config_block)),
+	cache_duration(config_block.GetPositiveValue("cache_duration", 1))
+{ }
+
+Visualization::VisualizationOutput::~VisualizationOutput()
+{ }
+
+void
+Visualization::VisualizationOutput::Enable() {
+
+	FmtInfo(vis_output_domain, "VisualizationOutput::Enable({})", std::this_thread::get_id());
+
+	BlockingCall(server.GetEventLoop(), [this](){
+		server.Open();
+	});
+
+}
+
+void
+Visualization::VisualizationOutput::Disable() noexcept {
+
+	FmtInfo(vis_output_domain, "VisualizationOutput::Disable({})", std::this_thread::get_id());
+
+	BlockingCall(server.GetEventLoop(), [this](){
+			server.Close();
+		});
+
+}
+
+void
+Visualization::VisualizationOutput::Open(AudioFormat &audio_format)
+{
+	FmtInfo(vis_output_domain, "VisualizationOutput::Open({})", std::this_thread::get_id());
+
+	/* At this point, we know the audio format, so we can at this point
+	 * instantiate the PCM data cache. */
+	pcache = make_shared<Visualization::SoundInfoCache>(audio_format,
+							    cache_duration);
+
+	BlockingCall(server.GetEventLoop(), [this, audio_format]() {
+		server.OnPluginOpened(pcache);
+	});
+}
+
+void
+Visualization::VisualizationOutput::Close() noexcept
+{
+	FmtInfo(vis_output_domain, "VisualizationOutput::Close({})", std::this_thread::get_id());
+
+	BlockingCall(server.GetEventLoop(), [this]() {
+		server.OnPluginClosed();
+	});
+
+	pcache = nullptr;
+}
+
+size_t
+Visualization::VisualizationOutput::Play(const std::span<const std::byte> src)
+{
+	pcache->Add(src.data(), src.size());
+	return src.size();
+}
+
+const struct AudioOutputPlugin visualization_output_plugin = {
+	"visualization",
+	nullptr, // cannot serve as the default output
+	&Visualization::VisualizationOutput::Create,
+	nullptr, // no particular mixer
+};
diff --git a/src/output/plugins/visualization/VisualizationOutputPlugin.hxx b/src/output/plugins/visualization/VisualizationOutputPlugin.hxx
new file mode 100644
index 0000000000..a2a9001462
--- /dev/null
+++ b/src/output/plugins/visualization/VisualizationOutputPlugin.hxx
@@ -0,0 +1,147 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright The Music Player Daemon Project
+
+#ifndef MPD_VISUALIZATION_OUTPUT_PLUGIN_HXX
+#define MPD_VISUALIZATION_OUTPUT_PLUGIN_HXX
+
+/**
+ * \page vis_out The Visualization Output Plugin
+ *
+ * \section vis_out_intro Introduction
+ *
+ * Unlike most output plugins, which provide sound data in one format or
+ * another, this plugin provides data \e derived from the current audio stream
+ * convenient for authors of
+ * <a href="https://en.wikipedia.org/wiki/Music_visualization">music visualizers</a>.
+ *
+ * \section vis_out_back Background
+ *
+ * This plugin started from a conversation on the #mpd IRC channel. I asked
+ * about the best way to implement a music visualizer as a remote MPD
+ * client. All of the MPD visualizers of which I was aware used the
+ * <a href="https://mpd.readthedocs.io/en/latest/plugins.html#fifo">fifo</a>
+ * output plugin and consequently had to be run on the same host as the MPD
+ * daemon. It was suggested that I write an output plugin that would stream the
+ * data needed to implement a visualizer.
+ *
+ * I submitted an
+ * <a href="https://github.com/MusicPlayerDaemon/MPD/pull/1449">RFC</a>
+ * in which we kicked around the ideas of implementing the simplest protocol
+ * first, and of exposing sound information not only over a network protocol
+ * (like, say, the HttpdOutput plugin), but also over the MPD
+ * <a href="https://mpd.readthedocs.io/en/latest/protocol.html">client protocol</a>.
+ *
+ * This plugin is the result of those conversations.
+ *
+ * \subsection vis_out_prior Prior Art
+ *
+ * Music visualization sources which I consulted before settling on this approach:
+ *
+ * - This <a href="https://github.com/MusicPlayerDaemon/MPD/pull/488">PR</a>
+ *   proposed solving the problem by implementing an output plugin that would
+ *   stream the raw PCM data over TCP, the idea being that the remote visualizer
+ *   would do the sound analysis client-side. The PR was discarded as being
+ *   redundant with the \c HttpdOutput plugin. I would also observe that such a
+ *   solution sends \e far more data on the wire than is needed for
+ *   visualization.
+ *
+ * - <a href="https://github.com/ncmpcpp/ncmpcpp">ncmpcpp</a> uses the
+ *   FifoOutput plugin, and as such can only provide the visualization feature
+ *   when it's being run locally. The sound analysis is limited, as well (on
+ *   which more below).
+ *
+ * - <a href="https://github.com/dpayne/cli-visualizer">cli-visualizer</a> will
+ *   work with the MPD FIFO output plugin (again assuming the MPD daemon is
+ *   running locally). Limited sound analysis, as well.
+ *
+ * - <a href="http://www.geisswerks.com/about_milkdrop.html">MilkDrop</a>:
+ *   reading the source code was very instructive in terms of sound analysis for
+ *   music visualization; that aspect of this plugin is largely based on it.
+ *
+ *
+ * \section vis_out_plugin The Plugin
+ *
+ * A new output plugin "visualization" is provided. The plugin "plays" PCM data
+ * by caching it. It provides continuous sound analysis at some caller-supplied
+ * offset of the current song time consisting of PCM samples, Fourier
+ * coefficients, frequency information & so forth. Like \c HttpdOutput and
+ * \c SnapcastOutput, the plugin includes a socket server that will provide a
+ * network endpoint at which clients can access sound analysis. In the future,
+ * analysis may be made available over the MPD client protocol as well.
+ *
+ *
+ * \subsection vis_output_plugin_arch Architecture
+ *
+ * VisualizationOutput is like HttpdOutput and SnapcastOutput in that it
+ * implements both an AudioOutput and a socket server. Unlike those two
+ * implementations, I chose not to multiply inherit from AudioOutput &
+ * ServerSocket. The are more details \ref vis_out_arch "here", but briefly: I
+ * chose to have VisualizationOutput \e own a ServerSocket rather than \e be a
+ * ServerSocket, and pushed the responsibility for caching PCM data down into
+ * class SoundInfoCache on which both my output plugin & socket server
+ * depend. This arrangement is intended to both break-up circular dependencies
+ * among the classes involved as well as reduce the number of places in which
+ * objects are accessed by multiple threads.
+ *
+ *
+ * \subsection vis_output_plugin_analysis Sound Analysis
+ *
+ * Given audio data in raw PCM format, a number of steps may be taken to analyze
+ * that data & produce information useful to visualizer authors. This section
+ * describes the full pipeline briefly. Most of these steps are optional at
+ * request-time and are described in greater detail in the relevant docs.
+ *
+ * - the PCM data may optionally be damped by taking a weighted average between
+ *   the current values & prior values in the time domain; this will have the
+ *   effect of reducing noise in the higher frequency ranges
+ *
+ * - the PCM data may have a window function applied to it in the time domain
+ *   around the time of interest; such a function has the effect of "dialing
+ *   down" audio samples further from the timestamp of interest and again will
+ *   reduce higher-frequency noise; the size of the window may be configured to
+ *   incorporate more or less data as desired.
+ *
+ * - the resulting PCM data will be shifted into the frequency domain by
+ *   application of the Discrete Fourier Transform
+ *
+ * - the human ear can only distinguish frequencies from (about) 200Hz to
+ *   20000Hz, and in practice musical sound information doesn't show much
+ *   activity above 10000Hz; it is therefore convenient to throw out frequency
+ *   data outside some (configurable) frequency range
+ *
+ * - it is also convenient to divide the resulting spectrum into a few coarse
+ *   bands, such as bass/mids/trebs. This is computationally non-trivial because
+ *   perceptually, frequency is not linear, it's logrithmic. A change of one
+ *   octave corresponds to a doubling in frequency. Intuitively, this means that
+ *   the difference betwenn 200 & 300Hz is much greater than the difference
+ *   betwen 5000 & 5100Hz, e.g. The plugin will peform this service for clients.
+ *
+ * - it can also be useful to maintain a weighted time average of the activity
+ *   in each frequency range for purposes of beat detection
+ *
+ *
+ * \subsection vis_output_protocol The Protocol
+ *
+ * The specifics of sound analysis are defined in the plugin configuration & are
+ * identical for all clients.  When clients connect, they provide the frame rate
+ * at which they would like to receive updates and the offset between
+ * client-side render time & server-side song time (to account for network lag,
+ * client-side buffering & the time needed to render each frame). Once that
+ * initial handshake is complete, the server will stream updates containing
+ * sound analysis results at regular intervals to the client.
+ *
+ * Note that each update need only be based on relatively few samples (Winamp,
+ * e.g. used 576). This will keep the data transferred on the wire small (at
+ * least by comparison to, say, the httpd output plugin which of course needs to
+ * send the entire song).  Casting the protocol in terms of client-side FPS
+ * allows us to avoid a "request/response" protocol & simply stream until the
+ * client goes away.
+ *
+ * The protocol specification has its own \ref vis_out_protocol "page".
+ *
+ *
+ */
+
+extern const struct AudioOutputPlugin visualization_output_plugin;
+
+#endif
diff --git a/src/output/plugins/visualization/VisualizationServer.cxx b/src/output/plugins/visualization/VisualizationServer.cxx
new file mode 100644
index 0000000000..337fc34eff
--- /dev/null
+++ b/src/output/plugins/visualization/VisualizationServer.cxx
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright The Music Player Daemon Project
+
+#include "VisualizationServer.hxx"
+
+#include "Log.hxx"
+#include "lib/fmt/ThreadIdFormatter.hxx"
+#include "config/Block.hxx"
+#include "util/Domain.hxx"
+
+using std::make_unique, std::move;
+
+const Domain vis_server_domain("vis_server");
+
+Visualization::VisualizationServer::VisualizationServer(
+	EventLoop &event_loop,
+	const char *bind_to_address,
+	uint16_t port,
+	size_t max_clients_in,
+	const SoundAnalysisParameters &params_in)
+: ServerSocket(event_loop),
+  max_clients(max_clients_in),
+  reaper(event_loop, BIND_THIS_METHOD(ReapClients)),
+  sound_params(params_in)
+{
+	FmtInfo(vis_server_domain, "VisualizationServer::VisualizationServer("
+		"{}:{}, {} clients maximum)", bind_to_address, port,
+		max_clients);
+
+	ServerSocketAddGeneric(*this, bind_to_address, port);
+}
+
+void
+Visualization::VisualizationServer::ReapClients() noexcept
+{
+	FmtNotice(vis_server_domain, "VisualizationServer::ReapClients({}, "
+			  "{} clients)", std::this_thread::get_id(), clients.size());
+
+	for (auto p0 = clients.begin(), p1 = clients.end(); p0 != p1; ) {
+		auto p = p0++;
+		if (p->IsClosed()) {
+			LogInfo(vis_server_domain, "Reaping closed client.");
+			clients.erase(p);
+		}
+	}
+
+	if (!clients.empty()) {
+		LogInfo(vis_server_domain, "Scheduling another reaping in 3 "
+			"seconds.");
+		reaper.Schedule(std::chrono::seconds(3));
+	}
+}
+
+void
+Visualization::VisualizationServer::OnPluginOpened(
+	const std::shared_ptr<SoundInfoCache> &pcache)
+{
+	state = HavePcmData{pcache };
+
+	for (auto p0 = clients.begin(), p1 = clients.end(); p0 != p1; ) {
+		auto p = p0++;
+		if (! p->IsClosed()) {
+			p->OnPluginOpened(pcache);
+		}
+	}
+}
+
+void
+Visualization::VisualizationServer::OnPluginClosed()
+{
+	state = std::monostate {};
+
+	for (auto p0 = clients.begin(), p1 = clients.end(); p0 != p1; ) {
+		auto p = p0++;
+		if (! p->IsClosed()) {
+			p->OnPluginClosed();
+		}
+	}
+
+}
+
+void
+Visualization::VisualizationServer::OnAccept(UniqueSocketDescriptor fd,
+					     SocketAddress /*address*/,
+					     int) noexcept
+{
+	FmtInfo(vis_server_domain, "VisualizationServer::OnAccept({})",
+		std::this_thread::get_id());
+
+	// Can we allow an additional client?
+	if (max_clients && clients.size() >= max_clients) {
+		FmtError(vis_server_domain, "Rejecting connection request; "
+			 "the maximum number of clients ({}) has already been "
+			 "reached.", max_clients);
+	} else {
+		if (state.index()) {
+			auto have_pcm_data = get<HavePcmData>(state);
+			clients.emplace_back(std::move(fd), GetEventLoop(), sound_params,
+					     have_pcm_data.pcache);
+		} else {
+			clients.emplace_back(std::move(fd), GetEventLoop(),
+					     sound_params);
+		}
+		reaper.Schedule(std::chrono::seconds(3));
+	}
+}
diff --git a/src/output/plugins/visualization/VisualizationServer.hxx b/src/output/plugins/visualization/VisualizationServer.hxx
new file mode 100644
index 0000000000..d8ebf79146
--- /dev/null
+++ b/src/output/plugins/visualization/VisualizationServer.hxx
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright The Music Player Daemon Project
+
+#ifndef VISUALIZATION_SERVER_HXX_INCLUDED
+#define VISUALIZATION_SERVER_HXX_INCLUDED 1
+
+#include "VisualizationClient.hxx"
+
+#include "SoundAnalysis.hxx"
+
+#include "config/Net.hxx"
+#include "event/CoarseTimerEvent.hxx"
+#include "event/ServerSocket.hxx"
+#include "net/SocketAddress.hxx"
+#include "net/UniqueSocketDescriptor.hxx"
+
+struct AudioFormat;
+struct ConfigBlock;
+
+namespace Visualization {
+
+class SoundInfoCache;
+
+/**
+ * \class VisualizationServer
+ *
+ * \brief A socker server handling visualization clients
+ *
+ * \sa \ref vis_out_arch "Architecture"
+ *
+ *
+ * This class handles binding one or more sockets & accepting incoming
+ * connections. For each such incoming connection, it will allocate a
+ * VisualizationClient instance to represent that client.
+ *
+ * The clients require both a PCM data cache and a SoundAnalysis instance to do
+ * their work. The former must be shared with the plugin that ultimately owns
+ * this class as well as the VisualizationClient instances, while the latter is
+ * cheaply copyable and so each client simply gets its own copy.
+ *
+ * The problem is that both must know the audio format in use (i.e. the number
+ * of samples per second and the number of channels), and that is only known
+ * when the plugin is "opened". Therefore this class can be represented by, yes,
+ * a finite state machine:
+ *
+ \code
+
+   Open --- OnPluginOpened() ---> HavePcmData
+	 ^				  |
+	 |				  |
+	 +---- OnPluginClosed() ----------+
+
+ \endcode
+ *
+ * When a new client connection is opened:
+ *
+ * - if we are in state Open, we cannot provide the client with sound analysis
+ *	 information nor a reference to the PCM cache
+ * - if we are in state HavePcmData, we can share a reference to our PCM cache
+ *	 along with the salient information needed for sound analysis
+ *
+ * On state change:
+ *
+ * - from Open to HavePcmData, we can update all extant clients with a
+ *	 shared reference to the PCM cache as well as the new sound analysis
+ *	 information
+ * - from HavePcmData to Open, we need to tell all extant clients to
+ *	 drop their PCM cache references, as well as their sound analysis
+ *	 information
+ *
+ *
+ */
+
+class VisualizationServer : public ServerSocket {
+
+	/// only valid when the plugin is open
+	struct HavePcmData {
+		// I wish C++ had a `not_null` class
+		std::shared_ptr<SoundInfoCache> pcache;
+	};
+	/// Present state-- v means closed, v means opened (the plugin, that is)
+	std::variant<std::monostate, HavePcmData> state;
+	/// maximum number of clients permitted; zero => unlimited
+	size_t max_clients;
+
+	/* Clients have both a reference to the PCM cache as well as a
+	 * SoundAnalysis instance while the plugin is opened. We'll create new
+	 * clients with our present state.
+	 * Nb. that VisualizationClient, being a BufferedSocket, is not
+	 * copy constructable, and so must be emplaced. */
+	std::list<VisualizationClient> clients;
+	/// invoked periodically to clean-up dead clients
+	CoarseTimerEvent reaper;
+	// Audio analysis parameters
+	SoundAnalysisParameters sound_params;
+
+public:
+	VisualizationServer(EventLoop &event_loop, const char *bind_to_address,
+			    uint16_t port, size_t max_clients,
+			    const SoundAnalysisParameters &params);
+
+	void ReapClients() noexcept;
+	void OnPluginOpened(const std::shared_ptr<SoundInfoCache> &pcache);
+	void OnPluginClosed();
+
+protected:
+	/* Invoked by `ServerSocket`, on its event loop, when a new client connects
+	 *
+	 * \a fd is the file descriptor of our new socket, \a address is the
+	 * remote address, and \a uid is the effective UID of the client if \a
+	 * fd is a UNIX-domain socket */
+	virtual void OnAccept(UniqueSocketDescriptor fd, SocketAddress address,
+			      int uid) noexcept override;
+
+};
+
+} // namespace Visualization
+
+#endif // VISUALIZATION_SERVER_HXX_INCLUDED
diff --git a/src/pcm/AudioFormat.hxx b/src/pcm/AudioFormat.hxx
index a7781aa351..108182466b 100644
--- a/src/pcm/AudioFormat.hxx
+++ b/src/pcm/AudioFormat.hxx
@@ -129,6 +129,10 @@ struct AudioFormat {
 	 */
 	unsigned GetFrameSize() const noexcept;
 
+	uint32_t GetSampleRate() const noexcept {
+		return sample_rate;
+	}
+
 	template<typename D>
 	constexpr auto TimeToFrames(D t) const noexcept {
 		using Period = typename D::period;
diff --git a/test/TestVisualization.cxx b/test/TestVisualization.cxx
new file mode 100644
index 0000000000..20c1c75703
--- /dev/null
+++ b/test/TestVisualization.cxx
@@ -0,0 +1,953 @@
+/*
+ * Copyright 2003-2022 The Music Player Daemon Project
+ * http://www.musicpd.org
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "output/plugins/visualization/SoundAnalysis.hxx"
+#include "output/plugins/visualization/SoundInfoCache.hxx"
+#include "output/plugins/visualization/Protocol.hxx"
+#include "util/ByteOrder.hxx"
+
+#include <chrono>
+#include <cstdint>
+#include <gtest/gtest.h>
+
+#include <fstream>
+#include <iostream>
+#include <memory>
+#include <math.h>
+
+using namespace Visualization;
+
+// "Smoke test" for SoundInfoCache
+TEST(VisualizationTest, SoundInfoCacheSmoke)
+{
+	using namespace std;
+	using namespace std::chrono;
+
+	// Validate a few assumptions I'm making about the API
+	AudioFormat std_fmt(44100, SampleFormat::S16, 2);
+	EXPECT_EQ(std_fmt.TimeToSize(seconds(1)), 44100 * 2 * 2);
+	EXPECT_TRUE(std_fmt.IsFullyDefined());
+	EXPECT_TRUE(std_fmt.IsValid());
+	EXPECT_EQ(std_fmt.GetFrameSize(), 4);
+	EXPECT_EQ(std_fmt.GetSampleRate(), 44100);
+
+	// Whip-up an unrealistic, but easy-to-reason-about audio format for testing
+	// purposes: 1Hz, mono, samples are signed bytes
+	AudioFormat fmt(1, SampleFormat::S8, 1);
+	EXPECT_TRUE(fmt.IsFullyDefined());
+	EXPECT_TRUE(fmt.IsValid());
+
+	{
+		// Silly case-- a cache that can handle exactly three samples
+		Visualization::SoundInfoCache cache(fmt, seconds(3));
+		// Add 2 seconds' worth of data
+		int8_t data[] = { 1, 2 };
+		cache.Add(data, sizeof(data));
+
+		// I now expect to have the following in my three-slot ring buffer:
+		//
+		// +---+---+---+
+		// | 1 | 2 |   |
+		// +---+---+---+
+		//	 ^		 ^
+		//	 p0		 p1
+
+		EXPECT_EQ(cache.Size(), 2);
+
+		int8_t buf[3];
+		bool status = cache.GetFromBeginning(2, buf, sizeof(buf));
+		EXPECT_TRUE(status);
+		EXPECT_EQ(buf[0], 1);
+		EXPECT_EQ(buf[1], 2);
+
+		data[0] = 3; data[1] = 4;
+		cache.Add(data, sizeof(data));
+
+		// I now expect to have the following in my three-slot ring buffer:
+		//
+		// +---+---+---+
+		// | 4 | 2 | 3 |
+		// +---+---+---+
+		//		 ^
+		//		p0,p1
+
+		EXPECT_EQ(cache.Size(), 3);
+
+		status = cache.GetFromBeginning(3, buf, sizeof(buf));
+		EXPECT_TRUE(status);
+		EXPECT_EQ(buf[0], 2);
+		EXPECT_EQ(buf[1], 3);
+		EXPECT_EQ(buf[2], 4);
+
+		data[0] = 5;
+		cache.Add(data, 1);
+
+		// I now expect to have the following in my three-slot ring buffer:
+		//
+		// +---+---+---+
+		// | 4 | 5 | 3 |
+		// +---+---+---+
+		//			 ^
+		//			 p0,p1
+
+		EXPECT_EQ(cache.Size(), 3);
+
+		status = cache.GetFromBeginning(3, buf, sizeof(buf));
+		EXPECT_TRUE(status);
+		EXPECT_EQ(buf[0], 3);
+		EXPECT_EQ(buf[1], 4);
+		EXPECT_EQ(buf[2], 5);
+
+		int8_t data3[] = { 6, 7, 8 };
+		cache.Add(data3, 3);
+
+		// I now expect to have the following in my three-slot ring buffer:
+		//
+		// +---+---+---+
+		// | 7 | 8 | 6 |
+		// +---+---+---+
+		//			 ^
+		//			 p0,p1
+
+		EXPECT_EQ(cache.Size(), 3);
+
+		status = cache.GetFromBeginning(3, buf, sizeof(buf));
+		EXPECT_TRUE(status);
+		EXPECT_EQ(buf[0], 6);
+		EXPECT_EQ(buf[1], 7);
+		EXPECT_EQ(buf[2], 8);
+
+		int8_t data4[] = { 9, 10, 11, 12 };
+		cache.Add(data4, 4);
+
+		// I now expect to have the following in my three-slot ring buffer:
+		//
+		// +----+----+----+
+		// | 10 | 11 | 12 |
+		// +----+----+----+
+		//	 ^
+		//	 p0,p1
+
+		EXPECT_EQ(cache.Size(), 3);
+
+		status = cache.GetFromBeginning(3, buf, sizeof(buf));
+		EXPECT_TRUE(status);
+		EXPECT_EQ(buf[0], 10);
+		EXPECT_EQ(buf[1], 11);
+		EXPECT_EQ(buf[2], 12);
+	}
+}
+
+// Test SoundInfoCache WRT timing
+TEST(VisualizationTest, SoundInfoCacheTiming)
+{
+	using namespace std;
+	using namespace std::chrono;
+
+	// Whip-up an unrealistic, but easy-to-reason-about audio format for testing purposes:
+	// 1Hz, mono, samples are signed bytes (i.e. 1 byte per sample
+	AudioFormat fmt(1, SampleFormat::S8, 1);
+	EXPECT_TRUE(fmt.IsFullyDefined());
+
+	// Silly case-- a cache that can handle exactly three samples
+	Visualization::SoundInfoCache cache(fmt, seconds(3));
+	// Add 2 seconds' worth of data
+	int8_t data[] = { 1, 2 };
+	cache.Add(data, sizeof(data));
+
+	// I now expect to have the following in my three-slot ring buffer:
+	//
+	// +---+---+---+
+	// | 1 | 2 |   |
+	// +---+---+---+
+	//   ^       ^
+	//  p0       p1
+	//  t0       t1 = t0 + 2 seconds
+	//
+	// I don't know what t0 is (it will be different every time this test is
+	// run), but t1 should be two seconds later than t0.
+	Visualization::SoundInfoCache::Time t0, t1;
+	tie(t0, t1) = cache.Range();
+	EXPECT_EQ(t1 - t0, seconds(2));
+
+	int8_t buf[3];
+	bool status = cache.GetByTime(2, t1, buf, sizeof(buf));
+	EXPECT_TRUE(status);
+
+	EXPECT_EQ(buf[0], 1);
+	EXPECT_EQ(buf[1], 2);
+
+	// Add 1 second's worth of data
+	data[0] = 3;
+	cache.Add(data, 1);
+
+	// I now expect to have the following in my three-slot ring buffer:
+	//
+	// +---+---+---+
+	// | 1 | 2 | 3 |
+	// +---+---+---+
+	//   ^
+	//   p0, p1
+	//   t0
+	//   t1 = t0 + 3 seconds
+	//
+	// I don't know what t0 is (it will be different every time this test is
+	// run), but t1 should be three seconds later than t0.
+	tie(t0, t1) = cache.Range();
+	EXPECT_EQ(t1 - t0, seconds(3));
+
+	status = cache.GetByTime(3, t1, buf, sizeof(buf));
+	EXPECT_TRUE(status);
+
+	EXPECT_EQ(buf[0], 1);
+	EXPECT_EQ(buf[1], 2);
+	EXPECT_EQ(buf[2], 3);
+
+	// Add 1 second's worth of data
+	data[0] = 4;
+	cache.Add(data, 1);
+
+	// I now expect to have the following in my three-slot ring buffer:
+	//
+	// +---+---+---+
+	// | 4 | 2 | 3 |
+	// +---+---+---+
+	//       ^
+	//       p0, p1
+	//       t0
+	//       t1 = t0 + 3 seconds
+	//
+	// I don't know what t0 is (it will be different every time this test is
+	// run), but t1 should be three seconds later than t0.
+	tie(t0, t1) = cache.Range();
+	EXPECT_EQ(t1 - t0, seconds(3));
+
+	status = cache.GetByTime(3, t1, buf, sizeof(buf));
+	EXPECT_TRUE(status);
+
+	EXPECT_EQ(buf[0], 2);
+	EXPECT_EQ(buf[1], 3);
+	EXPECT_EQ(buf[2], 4);
+
+	// Add another second's worth of data
+	data[0] = 5;
+	cache.Add(data, 1);
+
+	// I now expect to have the following in my three-slot ring buffer:
+	//
+	// +---+---+---+
+	// | 4 | 5 | 3 |
+	// +---+---+---+
+	//           ^
+	//           p0, p1
+	//           t0
+	//           t1 = t0 + 3 seconds
+	//
+	// I don't know what t0 is (it will be different every time this test is
+	// run), but t1 should be three seconds later than t0.
+	tie(t0, t1) = cache.Range();
+	EXPECT_EQ(t1 - t0, seconds(3));
+
+	// Add 2 seconds' worth of data
+	data[0] = 6; data[1] = 7;
+	cache.Add(data, 2);
+
+	// I now expect to have the following in my three-slot ring buffer:
+	//
+	// +---+---+---+
+	// | 7 | 5 | 6 |
+	// +---+---+---+
+	//       ^
+	//       p0, p1
+	//       t0
+	//       t1 = t0 + 3 seconds
+
+	tie(t0, t1) = cache.Range();
+	EXPECT_EQ(t1 - t0, seconds(3)); // 3 secs in the buffer
+
+	// Ask for two samples, ending at `t1`
+	status = cache.GetByTime(2, t1, buf, sizeof(buf));
+	EXPECT_TRUE(status);
+
+	EXPECT_EQ(buf[0], 6);
+	EXPECT_EQ(buf[1], 7);
+
+	// Let's try fractions-- at this point, we've got 3 second's worth of
+	// data in the cache, from [t0, t1 = t0 + 3 seconds).
+
+	// What happens if we ask for two samples, ending at t0 + 2500ms?
+	// TODO(sp1ff): DEBUG
+	auto d = milliseconds{2500}; // Should be 2500ms = 2.5sec
+	auto t = t0 + d;
+
+	status = cache.GetByTime(3, t /*t0 + milliseconds{2500}*/, buf, sizeof(buf));
+	EXPECT_TRUE(status);
+	EXPECT_EQ(buf[0], 5);
+	EXPECT_EQ(buf[1], 6);
+	EXPECT_EQ(buf[2], 7);
+
+	status = cache.GetByTime(2, t0 + milliseconds(1500), buf, sizeof(buf));
+	EXPECT_TRUE(status);
+	EXPECT_EQ(buf[0], 5);
+	EXPECT_EQ(buf[1], 6);
+
+	status = cache.GetByTime(1, t0 + milliseconds(500), buf, sizeof(buf));
+	EXPECT_TRUE(status);
+	EXPECT_EQ(buf[0], 5);
+
+	// Negative tests-- what happens if I ask for _two_ samples at t0 + 500ms--
+	// we can't satisfy that request
+	status = cache.GetByTime(2, t0 + milliseconds(500), buf, sizeof(buf));
+	EXPECT_FALSE(status);
+
+	// What if I ask for even one sample at t1 + 1ms
+	status = cache.GetByTime(1, t1 + milliseconds(1), buf, sizeof(buf));
+	EXPECT_FALSE(status);
+}
+
+// Exercise SoundInfoCache on a more realistic waveform
+TEST(VisualizationTest, Waveform)
+{
+	using namespace std;
+	using namespace std::chrono;
+
+	const double TWO_PI = 6.283185307179586476925286766559;
+
+	// Let's generate a waveform for a 1Hz sine wave, sampled at 44100 samples
+	// per second. Using format 44100:16:2, that's just over 172Kb (i.e. not too
+	// bad).
+	AudioFormat fmt(44100, SampleFormat::S16, 2);
+	EXPECT_TRUE(fmt.IsFullyDefined());
+
+	int16_t buf[44100 * 2];
+	for (int i = 0; i < 44100; ++i) {
+		double t = (double)i / 44100.0;
+		int16_t v = (int16_t) (sin(TWO_PI * t) * 32767.0);
+		buf[i * 2] = buf[i * 2 + 1] = v;
+	}
+
+	// Create a `SoundInfoCache` instance that can hold 5 seconds' worth of
+	// such data...
+	Visualization::SoundInfoCache cache(fmt, seconds(5));
+	// and add 6 seconds' worth of data to it.
+	cache.Add(buf, sizeof(buf));
+	Visualization::SoundInfoCache::Time t0, t1;
+	tie(t0, t1) = cache.Range();
+	EXPECT_EQ(t1 - t0, seconds(1));
+	cache.Add(buf, sizeof(buf));
+	cache.Add(buf, sizeof(buf));
+	cache.Add(buf, sizeof(buf));
+	cache.Add(buf, sizeof(buf));
+	cache.Add(buf, sizeof(buf));
+
+	// I should now have five seconds' worth of data in the cache.
+	Visualization::SoundInfoCache::Time t2, t3;
+	tie(t2, t3) = cache.Range();
+	EXPECT_EQ(t3 - t0, seconds(6));
+
+	// But we're at "song time" = 6 seconds
+	bool status = cache.GetByTime(100, t0 + seconds(6), buf, sizeof(buf));
+	EXPECT_TRUE(status);
+
+	// `buf[0:100]` should now contain the *last* 100 samples
+	for (int i = 0; i < 100; ++i) {
+		EXPECT_EQ(buf[2*i], buf[88000 + 2*i]);
+	}
+}
+
+/**
+ * \page vis_out_trivial_sample Trivial Waveforms for Testing Purposes
+ *
+ * \section vis_out_trivial_sample_intro Introduction
+ *
+ * Derivation of a trivial DFT for testing purposes.
+ *
+ * \section vis_out_trivial_sample_derivation Derivation
+ *
+ * Consider the waveform:
+ *
+ \code
+                   1
+   f(x) = sin(x) + - cos(2x)
+                   2
+ \endcode
+ *
+ * This function has a (continuous) Fourier transform of:
+ *
+ \code
+  1                                               1
+  - pi d(w - 2) - i pi d(w - 1) + i pi d(w + 1) + - pi d(w + 2)
+  2                                               2
+ \endcode
+ *
+ * where \c d denotes the dirac delta function and \c w represents the angular
+ * momentum. This makes sense: the frequency domain has "spikes" at frequencies
+ * of 1 & 2 (corresponding to the sin & cos arguments, respectively), and the
+ * "burst" at a frequency of 1 is twice as strong as that at 2 (corresponding to
+ * the sin & cos coefficients, resp.).
+ *
+ * Let's add a second waveform (so we can simulate stereo):
+ *
+ \code
+                   1
+  g(x) = sin(2x) + - cos(4x)
+                   4
+ \endcode
+ *
+ * The Fourier transform of \c g is:
+ *
+ \code
+  1                                         1
+  - pi d(w-4) - i pi d(w-2) + i pi d(w+2) + - pi d(w+4)
+  4                                         4
+ \endcode
+ *
+ * Similarly: we see spikes at 2 & 4, with the spike at 2 four times the size of
+ * the spike at 4.
+ *
+ * \subsection vis_out_trivial_sample_derivation_octave Gnu Octave Code
+ *
+ \code
+
+  octave:1> pkg load symbolic
+  octave:2> syms x
+  octave:3> f = sin (x) + 1/2 * cos (2*x)
+  octave:4> fourier (f)
+  ans = (sym)
+
+    π⋅δ(w - 2)                                 π⋅δ(w + 2)
+    ────────── - ⅈ⋅π⋅δ(w - 1) + ⅈ⋅π⋅δ(w + 1) + ──────────
+        2                                          2
+  octave:5> g = sin (2*x) + 1/4 * cos (4*x)
+  octave:6> fourier (g)
+  ans = (sym)
+    π⋅δ(w - 4)                                 π⋅δ(w + 4)
+    ────────── - ⅈ⋅π⋅δ(w - 2) + ⅈ⋅π⋅δ(w + 2) + ──────────
+        4                                          4
+ \endcode
+ *
+ * \subsection vis_out_trivial_sample_derivation_wolfram Wolfram Language
+ *
+ \code
+
+ FourierTransform[Sin[x]+1/2 Cos[2x],x, \[Omega], FourierParameters -> {1,-1}]
+ = 1/2 \[Pi] DiracDelta[-2+\[Omega]]-I \[Pi] DiracDelta[-1+\[Omega]]+I \[Pi] DiracDelta[1+\[Omega]]+1/2 \[Pi] DiracDelta[2+\[Omega]]
+
+  FourierTransform[Sin[2x]+1/4 Cos[4x],x, \[Omega], FourierParameters -> {1,-1}]
+  = 1/4 \[Pi] DiracDelta[-4 + \[Omega]] -
+ I \[Pi] DiracDelta[-2 + \[Omega]] +
+ I \[Pi] DiracDelta[2 + \[Omega]] + 1/4 \[Pi] DiracDelta[4 + \[Omega]]
+
+ \endcode
+ *
+ * \subsection vis_out_trivial_sample_dfts Discrete Fourier Transforms
+ *
+ * Let's sample these waveforms at 5 points over the range 0 to 2Pi: that's far
+ * too low a sampling rate to see much of anything, but it \em is simple enough
+ * that we can compute the discrete Fourier tranform by hand for testing
+ * purposes (we'll use a more realistic sampling rate later; right now we just
+ * want to check our basic calculations).
+ *
+ * At the same time, for convenience, let's introduce a transformation so that
+ * we can tell the codebase that we're sampling once per second (since 2*pi/5 is
+ * around 1.2566 and AudioFormat only accepts integers for the sample rate).
+ * Let x = pi * u /2, and we'll work in terms of u:
+ *
+ \code
+
+  i   u      x = u * pi/2    f(y)  g(y)
+  --  -      ------------    ----  ----
+  0   0  sec 0               1/2   1/4
+  1   1      Pi/2            1/2   1/4
+  2   2      Pi              1/2   1/4
+  3   3      3*Pi/2          -3/2  1/4
+  4   4      2*Pi            1/2   1/4
+
+ \endcode
+ *
+ * \subsubsection vis_out_trivial_sample_f
+ *
+ * Let's work out the Fourier coefficients "by hand". Let the k-th discrete
+ * Fourier coefficient for f be Y(k) and let the summing index for each
+ * coefficient be k:
+ *
+ \code
+
+  k   j =>        0                  1                 2                 3                 4
+  |
+  v        1  -2pi*0*0*i/5    1  -2pi*1*0*i/5   1  -2pi*2*0*i/5   3  -2pi*3*0*i/5   1  -2pi*4*0*i/5
+      Y  = - e              + - e             + - e             - - e             + - e
+  0    0   2                  2                 2                 2                 2
+
+           1  -2pi*0*1*i/5    1  -2pi*1*1*i/5   1  -2pi*2*1*i/5   3  -2pi*3*1*i/5   1  -2pi*4*1*i/5
+  1   Y  = - e              + - e             + - e             - - e             + - e
+       1   2                  2                 2                 2                 2
+
+           1  -2pi*0*2*i/5    1  -2pi*1*2*i/5   1  -2pi*2*2*i/5   3  -2pi*3*2*i/5   1  -2pi*4*2*i/5
+  2   Y  = - e              + - e             + - e             - - e             + - e
+       2   2                  2                 2                 2                 2
+
+           1  -2pi*0*3*i/5    1  -2pi*1*3*i/5   1  -2pi*2*3*i/5   3  -2pi*3*3*i/5   1  -2pi*4*3*i/5
+  3   Y  = - e              + - e             + - e             - - e             + - e
+       3   2                  2                 2                 2                 2
+
+           1  -2pi*0*4*i/5    1  -2pi*1*4*i/5   1  -2pi*2*4*i/5   3  -2pi*3*4*i/5   1  -2pi*4*4*i/5
+  4   Y  = - e              + - e             + - e             - - e             + - e
+       4   2                  2                 2                 2                 2
+
+ \endcode
+ *
+ * OK-- time to let Octave take over:
+ *
+ \code
+
+  vpa(1/sym(2)*exp(-sym(2)*sym(pi)*0*    0 *i/sym(5)) + 1/sym(2)*exp(-sym(2)*sym(pi)*1*    0 *i/sym(5)) + 1/sym(2)*exp(-sym(2)*sym(pi)*sym(2)*    0 *i/sym(5)) - sym(3)/sym(2)*exp(-sym(2)*sym(pi)*sym(3)*    0* i/sym(5)) + 1/sym(2)*exp(-sym(2)*sym(pi)*sym(4)*    0 *i/sym(5)))
+  vpa(1/sym(2)*exp(-sym(2)*sym(pi)*0*    1 *i/sym(5)) + 1/sym(2)*exp(-sym(2)*sym(pi)*1*    1 *i/sym(5)) + 1/sym(2)*exp(-sym(2)*sym(pi)*sym(2)*    1 *i/sym(5)) - sym(3)/sym(2)*exp(-sym(2)*sym(pi)*sym(3)*    1* i/sym(5)) + 1/sym(2)*exp(-sym(2)*sym(pi)*sym(4)*    1 *i/sym(5)))
+  vpa(1/sym(2)*exp(-sym(2)*sym(pi)*0*sym(2)*i/sym(5)) + 1/sym(2)*exp(-sym(2)*sym(pi)*1*sym(2)*i/sym(5)) + 1/sym(2)*exp(-sym(2)*sym(pi)*sym(2)*sym(2)*i/sym(5)) - sym(3)/sym(2)*exp(-sym(2)*sym(pi)*sym(3)*sym(2)*i/sym(5)) + 1/sym(2)*exp(-sym(2)*sym(pi)*sym(4)*sym(2)*i/sym(5)))
+  vpa(1/sym(2)*exp(-sym(2)*sym(pi)*0*sym(3)*i/sym(5)) + 1/sym(2)*exp(-sym(2)*sym(pi)*1*sym(3)*i/sym(5)) + 1/sym(2)*exp(-sym(2)*sym(pi)*sym(2)*sym(3)*i/sym(5)) - sym(3)/sym(2)*exp(-sym(2)*sym(pi)*sym(3)*sym(3)*i/sym(5)) + 1/sym(2)*exp(-sym(2)*sym(pi)*sym(4)*sym(3)*i/sym(5)))
+  vpa(1/sym(2)*exp(-sym(2)*sym(pi)*0*sym(4)*i/sym(5)) + 1/sym(2)*exp(-sym(2)*sym(pi)*1*sym(4)*i/sym(5)) + 1/sym(2)*exp(-sym(2)*sym(pi)*sym(2)*sym(4)*i/sym(5)) - sym(3)/sym(2)*exp(-sym(2)*sym(pi)*sym(3)*sym(4)*i/sym(5)) + 1/sym(2)*exp(-sym(2)*sym(pi)*sym(4)*sym(4)*i/sym(5)))
+
+  ans = (sym)  0.5000000000000000000000000000000
+  ans = (sym)  1.6180339887498948482045868343656  - 1.1755705045849462583374119092781⋅ⅈ
+  ans = (sym) -0.61803398874989484820458683436564 + 1.9021130325903071442328786667588⋅ⅈ
+  ans = (sym) -0.61803398874989484820458683436564 - 1.9021130325903071442328786667588⋅ⅈ
+  ans = (sym)  1.6180339887498948482045868343656  + 1.1755705045849462583374119092781⋅ⅈ
+
+ \endcode
+ *
+ * Let's confirm with Mathematica:
+ *
+ \code
+
+ In[5]:= Fourier[{1/2,1/2,1/2,-3/2,1/2}, FourierParameters -> {1,-1}]
+ Out[5]= {0.5 +0. I,1.61803 -1.17557 I,-0.618034+1.90211 I,-0.618034-1.90211 I,1.61803 +1.17557 I}
+
+ \endcode
+ *
+ * \subsubsection vis_out_trivial_sample_g
+ *
+ \code
+
+  k   j =>        0                  1                 2                 3                 4
+  |
+  v        1  -2pi*0*0*i/5    1  -2pi*1*0*i/5   1  -2pi*2*0*i/5   1  -2pi*3*0*i/5   1  -2pi*4*0*i/5
+      Y  = - e              + - e             + - e             + - e             + - e
+  0    0   4                  4                 4                 4                 4
+
+           1  -2pi*0*1*i/5    1  -2pi*1*1*i/5   1  -2pi*2*1*i/5   1  -2pi*3*1*i/5   1  -2pi*4*1*i/5
+  1   Y  = - e              + - e             + - e             + - e             + - e
+       1   4                  4                 4                 4                 4
+
+           1  -2pi*0*2*i/5    1  -2pi*1*2*i/5   1  -2pi*2*2*i/5   1  -2pi*3*2*i/5   1  -2pi*4*2*i/5
+  2   Y  = - e              + - e             + - e             + - e             + - e
+       2   4                  4                 4                 4                 4
+
+           1  -2pi*0*3*i/5    1  -2pi*1*3*i/5   1  -2pi*2*3*i/5   1  -2pi*3*3*i/5   1  -2pi*4*3*i/5
+  3   Y  = - e              + - e             + - e             + - e             + - e
+       3   4                  4                 4                 4                 4
+
+           1  -2pi*0*4*i/5    1  -2pi*1*4*i/5   1  -2pi*2*4*i/5   1  -2pi*3*4*i/5   1  -2pi*4*4*i/5
+  4   Y  = - e              + - e             + - e             + - e             + - e
+       4   4                  4                 4                 4                 4
+
+ \endcode
+ *
+ \code
+
+  vpa(1/sym(4)*exp(-sym(2)*sym(pi)*0*    0 *i/sym(5)) + 1/sym(4)*exp(-sym(2)*sym(pi)*1*    0 *i/sym(5)) + 1/sym(4)*exp(-sym(2)*sym(pi)*sym(2)*    0 *i/sym(5)) + 1/sym(4)*exp(-sym(2)*sym(pi)*sym(3)*    0* i/sym(5)) + 1/sym(4)*exp(-sym(2)*sym(pi)*sym(4)*    0 *i/sym(5)))
+  vpa(1/sym(4)*exp(-sym(2)*sym(pi)*0*    1 *i/sym(5)) + 1/sym(4)*exp(-sym(2)*sym(pi)*1*    1 *i/sym(5)) + 1/sym(4)*exp(-sym(2)*sym(pi)*sym(2)*    1 *i/sym(5)) + 1/sym(4)*exp(-sym(2)*sym(pi)*sym(3)*    1* i/sym(5)) + 1/sym(4)*exp(-sym(2)*sym(pi)*sym(4)*    1 *i/sym(5)))
+  vpa(1/sym(4)*exp(-sym(2)*sym(pi)*0*sym(2)*i/sym(5)) + 1/sym(4)*exp(-sym(2)*sym(pi)*1*sym(2)*i/sym(5)) + 1/sym(4)*exp(-sym(2)*sym(pi)*sym(2)*sym(2)*i/sym(5)) + 1/sym(4)*exp(-sym(2)*sym(pi)*sym(3)*sym(2)*i/sym(5)) + 1/sym(4)*exp(-sym(2)*sym(pi)*sym(4)*sym(2)*i/sym(5)))
+  vpa(1/sym(4)*exp(-sym(2)*sym(pi)*0*sym(3)*i/sym(5)) + 1/sym(4)*exp(-sym(2)*sym(pi)*1*sym(3)*i/sym(5)) + 1/sym(4)*exp(-sym(2)*sym(pi)*sym(2)*sym(3)*i/sym(5)) + 1/sym(4)*exp(-sym(2)*sym(pi)*sym(3)*sym(3)*i/sym(5)) + 1/sym(4)*exp(-sym(2)*sym(pi)*sym(4)*sym(3)*i/sym(5)))
+  vpa(1/sym(4)*exp(-sym(2)*sym(pi)*0*sym(4)*i/sym(5)) + 1/sym(4)*exp(-sym(2)*sym(pi)*1*sym(4)*i/sym(5)) + 1/sym(4)*exp(-sym(2)*sym(pi)*sym(2)*sym(4)*i/sym(5)) + 1/sym(4)*exp(-sym(2)*sym(pi)*sym(3)*sym(4)*i/sym(5)) + 1/sym(4)*exp(-sym(2)*sym(pi)*sym(4)*sym(4)*i/sym(5)))
+
+  ans = (sym) 1.2500000000000000000000000000000
+  ans = (sym) 0.e-142 + 0.e-142⋅ⅈ
+  ans = (sym) 0.e-142 + 0.e-142⋅ⅈ
+  ans = (sym) 0.e-142 + 0.e-142⋅ⅈ
+  ans = (sym) 0.e-142 + 0.e-142⋅ⅈ
+
+ \endcode
+ *
+ * Again, let's confirm with Mathematica:
+ *
+ \code
+
+ In[6]:= Fourier[{1/4,1/4,1/4,1/4,1/4}, FourierParameters -> {1,-1}]
+ Out[6]= {1.25,5.55112*10^-17,5.55112*10^-17,5.55112*10^-17,5.55112*10^-17}
+
+ \endcode
+ *
+ *
+ */
+
+// Read the four bytes at \p as a float in the network protocol
+inline float float_at(std::byte *p, size_t i) {
+	uint32_t as_uint = FromBE32(*(uint32_t*)(p + 4*i));
+	return *(float*)&as_uint;
+}
+
+// Test SoundAnalaysis against a trivial DFT
+TEST(VisualizationTest, TrivialDft)
+{
+	using namespace std::chrono;
+
+	// Let's represent our wave form as IEEE 754 single precisions floats,
+	// sampled once per second, with two channels (i.e. stereo).
+	AudioFormat fmt(1, SampleFormat::FLOAT, 2);
+
+	// Sanity check-- 20 bytes is 5 samples, which should be five seconds'
+	// worth. Double for the two channels
+	Visualization::SoundInfoCache::Duration us = fmt.SizeToTime<Visualization::SoundInfoCache::Duration>(40);
+	EXPECT_EQ(us, seconds(5));
+
+	constexpr float samples[10] = { 0.5, 0.25, 0.5, 0.25, 0.5, 0.25, -1.5, 0.25, 0.5, 0.25 };
+	std::shared_ptr<Visualization::SoundInfoCache> pcache =
+		std::make_unique<Visualization::SoundInfoCache>(fmt, seconds(6)); // six seconds' capacity, just so we
+										   // don't need to worry
+	pcache->Add(samples, sizeof(samples));
+	EXPECT_EQ(pcache->Size(), 40);
+
+	Visualization::SoundInfoCache::Time t0, t1;
+	std::tie(t0, t1) = pcache->Range();
+	// `t0` is whatever time the first sample was added; what we know is that
+	// `t1` should be five seconds later.
+	auto d = t1 - t0;
+	EXPECT_EQ(d, seconds(5)) << "t0 is " << t0 << ", t1 is " << t1 << ", d is " << d;
+
+	// For each channel, we'll get back five Fourier coefficients, corresponding
+	// to the frequencies 0Hz, 1/5Hz, 2/5, 3/5 & 4/5. Let's pick cuttoffs that
+	// will discard the highest & the lowest, just for testing purposes.
+	SoundAnalysisParameters params { 5, 0.25, 0.75 };
+	SoundAnalysis analysis(params, pcache);
+
+	EXPECT_EQ(2, analysis.NumChan());
+	EXPECT_EQ(5, analysis.NumSamp());
+	EXPECT_EQ(3, analysis.NumFreq());
+
+	EXPECT_TRUE(analysis.Update(t1));
+
+	// Three coefficients per channel, two channels
+	fftwf_complex coeffs[6];
+	analysis.GetCoeffs(coeffs, sizeof(coeffs));
+
+	EXPECT_FLOAT_EQ(coeffs[0][0],  0.5);
+	EXPECT_FLOAT_EQ(coeffs[0][1],  0.0);
+	EXPECT_FLOAT_EQ(coeffs[1][0],  1.6180339887498948482045868343656);
+	EXPECT_FLOAT_EQ(coeffs[1][1], -1.1755705045849462583374119092781);
+	EXPECT_FLOAT_EQ(coeffs[2][0], -0.61803398874989484820458683436564);
+	EXPECT_FLOAT_EQ(coeffs[2][1],  1.9021130325903071442328786667588);
+	EXPECT_FLOAT_EQ(coeffs[3][0],  1.25);
+	EXPECT_FLOAT_EQ(coeffs[3][1],  0.0);
+	EXPECT_FLOAT_EQ(coeffs[4][0],  0.0);
+	EXPECT_FLOAT_EQ(coeffs[4][0],  0.0);
+	EXPECT_FLOAT_EQ(coeffs[5][0],  0.0);
+	EXPECT_FLOAT_EQ(coeffs[5][0],  0.0);
+
+	// bass/mids/trebs: 0/2/4 (left)
+	// bass/mids/trebs: 0/0/0 (right)
+
+	float bmt[6];
+	EXPECT_TRUE(analysis.GetBassMidsTrebs(bmt, 6));
+
+	EXPECT_FLOAT_EQ(bmt[0], 0.0);
+	EXPECT_FLOAT_EQ(bmt[1], 2.0);
+	EXPECT_FLOAT_EQ(bmt[2], 4.0);
+	EXPECT_FLOAT_EQ(bmt[3], 0.0);
+	EXPECT_FLOAT_EQ(bmt[4], 0.0);
+	EXPECT_FLOAT_EQ(bmt[5], 0.0);
+
+	// Serialization:
+	//
+	// +----------+----------+-------------+-----------+----------+---------+---------+----------+------------+---------------+-----------------+
+	// | num_samp | num_chan | sample_rate | waveforms | num_freq | freq_lo | freq_hi | freq_off |   coeffs   | power_spectra | bass/mids/trebs |
+	// | -------- | -------- | ----------- | --------- | -------- | ------- | ------- | -------- | ---------- | ------------- | --------------- |
+	// | uint16_t |  uint8_t |  uint16_t   | see below | uint16_t |  float  |  float  | uint16_t | see below  |  see below    | see below       |
+	// |     0005 |       02 |      0001   |           |      003 |    0.25 |    0.75 |     0001 |            |               |                 |
+	// +----------+----------+-------------+-----------+----------+---------+---------+----------+------------+---------------+-----------------+
+	//          2          1             2       40             2         4        4           2           48              24   24
+	//	153 octets, total
+
+	// waveforms:
+	// chan 0: 0.5, 0.5 0.5 -1.5, 0.5
+	// chan 1: 0.25 0.25 0.25, 0.25, 0.25
+
+	// coeffs:
+	// chan 0: (1.6180339887498948482045868343656, -1.1755705045849462583374119092781), (-0.61803398874989484820458683436564, 1.9021130325903071442328786667588) (-0.61803398874989484820458683436564, 1.9021130325903071442328786667588)
+	// chan 1: (0.0, 0.0) (0.0, 0.0) (0.0, 0.0)
+
+	// spectra:
+	// chan 0: 2, 2, 2
+	// chan 1: 0, 0, 0
+
+	std::byte buf[153];
+	std::byte *p1 = analysis.SerializeSoundInfoFramePayload(buf);
+	std::byte *p0 = buf;
+	EXPECT_EQ(p1, p0 + 153);
+
+	EXPECT_EQ(FromBE16(*(uint16_t*)p0), 5); p0 += 2; // num_samp := 5
+	EXPECT_EQ(*p0, (std::byte)2);		 p0 += 1; // num_chan := 2
+	EXPECT_EQ(FromBE16(*(uint16_t*)p0), 1); p0 += 2; // sample_rate := 1
+
+	// waveform, channel 0
+	EXPECT_FLOAT_EQ(float_at(p0, 0),  0.5);
+	EXPECT_FLOAT_EQ(float_at(p0, 1),  0.5);
+	EXPECT_FLOAT_EQ(float_at(p0, 2),  0.5);
+	EXPECT_FLOAT_EQ(float_at(p0, 3), -1.5);
+	EXPECT_FLOAT_EQ(float_at(p0, 4),  0.5);
+	p0 += 20;
+
+	// waveform, channel 1
+	EXPECT_FLOAT_EQ(float_at(p0, 0), 0.25);
+	EXPECT_FLOAT_EQ(float_at(p0, 1), 0.25);
+	EXPECT_FLOAT_EQ(float_at(p0, 2), 0.25);
+	EXPECT_FLOAT_EQ(float_at(p0, 3), 0.25);
+	EXPECT_FLOAT_EQ(float_at(p0, 4), 0.25);
+	p0 += 20;
+
+	EXPECT_EQ(FromBE16(*(uint16_t*)p0), 3); p0 += 2; // num_freq := 3
+
+	EXPECT_FLOAT_EQ(float_at(p0, 0), 0.25); // freq_lo
+	EXPECT_FLOAT_EQ(float_at(p0, 1), 0.75); // freq_hi
+	p0 += 8;
+
+	EXPECT_EQ(FromBE16(*(uint16_t*)p0), 1); p0 += 2; // freq_off
+
+	// coefficients, channel 0
+	EXPECT_FLOAT_EQ(float_at(p0, 0),  1.6180339887498948482045868343656);
+	EXPECT_FLOAT_EQ(float_at(p0, 1), -1.1755705045849462583374119092781);
+	EXPECT_FLOAT_EQ(float_at(p0, 2), -0.61803398874989484820458683436564);
+	EXPECT_FLOAT_EQ(float_at(p0, 3),  1.9021130325903071442328786667588);
+	EXPECT_FLOAT_EQ(float_at(p0, 4), -0.61803398874989484820458683436564);
+	EXPECT_FLOAT_EQ(float_at(p0, 5), -1.9021130325903071442328786667588);
+	p0 += 24;
+
+	// For small quantities, absolute error is more reliable than relative
+	const float ZERO_THRESH = 1.0e-43f;
+
+	// coefficients, channel 1
+	EXPECT_NEAR(float_at(p0, 0), 0.0, ZERO_THRESH);
+	EXPECT_NEAR(float_at(p0, 1), 0.0, ZERO_THRESH);
+	EXPECT_NEAR(float_at(p0, 2), 0.0, ZERO_THRESH);
+	EXPECT_NEAR(float_at(p0, 3), 0.0, ZERO_THRESH);
+	EXPECT_NEAR(float_at(p0, 4), 0.0, ZERO_THRESH);
+	EXPECT_NEAR(float_at(p0, 5), 0.0, ZERO_THRESH);
+	p0 += 24;
+
+	EXPECT_NEAR(float_at(p0, 0), 2.0, ZERO_THRESH);
+	EXPECT_NEAR(float_at(p0, 1), 2.0, ZERO_THRESH);
+	EXPECT_NEAR(float_at(p0, 2), 2.0, ZERO_THRESH);
+	p0 += 12;
+
+	EXPECT_NEAR(float_at(p0, 0), 0.0, ZERO_THRESH);
+	EXPECT_NEAR(float_at(p0, 1), 0.0, ZERO_THRESH);
+	EXPECT_NEAR(float_at(p0, 2), 0.0, ZERO_THRESH);
+	p0 += 12;
+
+	// bass/mids/trebs
+
+	EXPECT_NEAR(float_at(p0, 0), 0.0, ZERO_THRESH);
+	EXPECT_NEAR(float_at(p0, 1), 2.0, ZERO_THRESH);
+	EXPECT_NEAR(float_at(p0, 2), 4.0, ZERO_THRESH);
+	p0 += 12;
+
+	EXPECT_NEAR(float_at(p0, 0), 0.0, ZERO_THRESH);
+	EXPECT_NEAR(float_at(p0, 1), 0.0, ZERO_THRESH);
+	EXPECT_NEAR(float_at(p0, 2), 0.0, ZERO_THRESH);
+	p0 += 12;
+}
+
+// Now let's try a more realistic sampling rate
+TEST(VisualizationTest, SinesAndCosines)
+{
+	using namespace std::chrono;
+
+	const float TWO = 2.f;
+	const float FOUR = 4.f;
+
+	// Everything below is driven off `NUM_SAMP`-- the higher this number is,
+	// the closer we'll get to a dirac delta function at these functions'
+	// frequencies.
+	const size_t NUM_SAMP = /*101*/ /*513*/ 1025;
+
+	const size_t NUM_COEFF = (NUM_SAMP / 2) + 1;
+	const size_t SAMPLE_RATE_HZ = size_t((float)NUM_SAMP / 6.28318531f) + 1;
+
+	// Just for fun (and better test coverage) we'll represent our waveforms as
+	// signed 16-bit integers, sampled at ceil(num_samp/2/Pi,) with two channels
+	// (i.e. stereo).
+	AudioFormat fmt(SAMPLE_RATE_HZ, SampleFormat::S16, 2);
+
+	// Let's sample over the entire period of these functions (2Pi =~ 6.28)
+	std::shared_ptr<Visualization::SoundInfoCache> pcache =
+		std::make_shared<Visualization::SoundInfoCache>(fmt, seconds(7));
+
+	// Sample the functions over all of [0, 2*Pi), so the DFT has a chance
+	// to "see" all the frequencies in one period of each function.
+	int16_t samples[SAMPLE_RATE_HZ * 2];
+	// We sample the waveforms one second at a time, filling-up the cache as we
+	// go:
+	for (size_t i = 0; i < 7; ++i) {
+		for (size_t j = 0; j < SAMPLE_RATE_HZ; ++j) {
+			float x = (float)i + float(j) / (float)SAMPLE_RATE_HZ;
+			float f = sin(x) + cos(TWO * x) / TWO;
+			float g = sin(TWO * x) + cos(FOUR * x) / FOUR;
+
+			// -1.5 <= f <= 0.75 (approx), & -1.25 <= g <= 0.75 (approx), so
+			// -let's scale f & g.
+			samples[2 * j	 ] = (int16_t)(f * 1024.f);
+			samples[2 * j + 1] = (int16_t)(g * 1024.f);
+		}
+		pcache->Add(samples, sizeof(samples));
+	}
+
+	Visualization::SoundInfoCache::Time t0, t1;
+	std::tie(t0, t1) = pcache->Range();
+
+	// Quick sanity check-- `t0` is whatever time the first sample was added;
+	// what we *do* know is that `t1` should be seven seconds later.
+	auto d = t1 - t0;
+	EXPECT_EQ(d, seconds(7)) << "t0 is " << t0 << ", t1 is " << t1 << ", d is " << d;
+
+	// OK-- compute the DFT:
+	SoundAnalysisParameters params(NUM_SAMP, 0.f, 20000.f);
+	SoundAnalysis analysis(params, pcache);
+
+	EXPECT_TRUE(analysis.Update(t1));
+	fftwf_complex coeffs[2 * NUM_COEFF];
+	EXPECT_TRUE(analysis.GetCoeffs(coeffs, sizeof(coeffs)));
+
+	float spectra[2 * NUM_COEFF];
+	for (size_t i = 0; i < NUM_COEFF; ++i) {
+		float mag_left = sqrt(coeffs[i][0] * coeffs[i][0] + coeffs[i][1] * coeffs[i][1]);
+		spectra[i] = mag_left > 1.0f ? mag_left : 0.f; // threshold
+
+		float mag_right = sqrt(coeffs[NUM_COEFF + i][0] * coeffs[NUM_COEFF + i][0] +
+							   coeffs[NUM_COEFF + i][1] * coeffs[NUM_COEFF + i][1]);
+		spectra[NUM_COEFF + i] = mag_right > 1.0f ? mag_right : 0.f; // threshold
+	}
+
+	// left: should see frequency at coeff 1 & coeff 2 (half as big as one)
+	float abs_err = spectra[1] / 50.f;
+	EXPECT_NEAR(spectra[1], TWO * spectra[2], abs_err);
+
+	float thresh = spectra[1] / 50.f;
+	for (size_t i = 0; i < NUM_COEFF; ++i) {
+		if (i != 1 && i != 2) {
+			EXPECT_TRUE(spectra[i] < thresh)
+				<< "i is " << i << ", threshold is " << thresh <<
+				", spectra[i] is " << spectra[i];
+		}
+	}
+
+	// right: should see 'em at 2 & 4 (the one at 4 being one-quarter the size)
+	abs_err = spectra[NUM_COEFF + 2] / 50.f;
+	EXPECT_NEAR(spectra[NUM_COEFF + 2], FOUR * spectra[NUM_COEFF + 4], abs_err);
+	thresh = spectra[NUM_COEFF + 2] /50.f;
+	for (size_t i = 0; i < NUM_COEFF; ++i) {
+		if (i != 2 && i != 4) {
+			EXPECT_TRUE(spectra[NUM_COEFF + i] < thresh)
+				<< "i is " << i << ", threshold is " << thresh <<
+				", spectra[NUM_COEFF + i] is " << spectra[NUM_COEFF + i];
+		}
+	}
+
+}
+
+// Network protocol -- deserialization
+TEST(VisualizationTest, TestDeCliHlo)
+{
+	ClientHello clihlo;
+	uint8_t incomplete_buf_0[] = { 0x00 };
+	EXPECT_EQ(ParseResult::NEED_MORE_DATA,
+		  ParseClihlo(incomplete_buf_0, sizeof(incomplete_buf_0), clihlo));
+
+	// Correct message type, length is zero
+	uint8_t incomplete_buf_1[] = { 0x00, 0x00, 0x00, 0x00 };
+	EXPECT_EQ(ParseResult::NEED_MORE_DATA,
+		  ParseClihlo(incomplete_buf_1, sizeof(incomplete_buf_1), clihlo));
+
+	// Correct message type, length is correct, payload is incomplete
+	uint8_t incomplete_buf_2[] = { 0x00, 0x00, 0x00, 0x06, 0x00, 0x01, 0x00, 0x20 };
+	EXPECT_EQ(ParseResult::NEED_MORE_DATA,
+		  ParseClihlo(incomplete_buf_2, sizeof(incomplete_buf_2), clihlo));
+
+	// Correct message type, length is correct, missing "check byte"
+	uint8_t incomplete_buf_3[] = {
+		0x00, 0x00,
+		0x00, 0x06,
+		0x00, 0x01,
+		0x00, 0x20,
+		0x00, 0xff
+	};
+	EXPECT_EQ(ParseResult::NEED_MORE_DATA,
+		  ParseClihlo(incomplete_buf_3, sizeof(incomplete_buf_3), clihlo));
+
+	// Correct message, except the length is incorrect
+	uint8_t incomplete_buf_4[] = {
+		0x00, 0x00,
+		0x00, 0x05,
+		0x00, 0x01,
+		0x00, 0x20,
+		0x00, 0xff
+	};
+	EXPECT_EQ(ParseResult::NEED_MORE_DATA,
+		  ParseClihlo(incomplete_buf_4, sizeof(incomplete_buf_4), clihlo));
+
+	// Finally correct
+	uint8_t complete_buf_0[] = {
+		0x00, 0x00,
+		0x00, 0x06,
+		0x00, 0x01,
+		0x00, 0x20,
+		0x00, 0xff,
+		0x00
+	};
+	EXPECT_EQ(ParseResult::OK,
+		  ParseClihlo(complete_buf_0, sizeof(complete_buf_0), clihlo));
+
+	EXPECT_EQ(clihlo.major_version, 0);
+	EXPECT_EQ(clihlo.minor_version, 1);
+	EXPECT_EQ(clihlo.requested_fps, 32);
+	EXPECT_EQ(clihlo.tau, 255);
+}
+
+// Network protocol -- serialization
+TEST(VisualizationTest, TestSerSrvHlo)
+{
+	using std::byte;
+
+	byte buf[] = {
+		(byte)0x00, (byte)0x00, // type
+		(byte)0x00, (byte)0x00, // length
+		(byte)0x00, (byte)0x00, // payload
+		(byte)0x00,				// check
+		(byte)0xaa				// tombstone
+	};
+
+	SerializeSrvhlo((byte)3, (byte)2, buf);
+
+	ASSERT_EQ(buf[0], (byte)0x00);
+	ASSERT_EQ(buf[1], (byte)0x01);
+	ASSERT_EQ(buf[2], (byte)0x00);
+	ASSERT_EQ(buf[3], (byte)0x02);
+	ASSERT_EQ(buf[4], (byte)0x03);
+	ASSERT_EQ(buf[5], (byte)0x02);
+	ASSERT_EQ(buf[6], (byte)0x00);
+	ASSERT_EQ(buf[7], (byte)0xaa);
+}
diff --git a/test/meson.build b/test/meson.build
index 52833feea7..0840beb9a3 100644
--- a/test/meson.build
+++ b/test/meson.build
@@ -635,3 +635,38 @@ if alsa_dep.found()
 endif
 
 subdir('fs')
+
+#
+# Visualization Output
+#
+
+test(
+  'test_vis',
+  executable(
+    'test_vis',
+    'TestVisualization.cxx',
+    include_directories: inc,
+    dependencies: [
+      output_plugins_dep,
+      gtest_dep,
+    ],
+  ),
+  protocol: 'gtest',
+)
+
+#
+# Visualization client
+#
+
+executable(
+  'run_vis',
+  'run_vis.cxx',
+  include_directories: inc,
+  dependencies: [
+    output_registry_dep,
+    encoder_glue_dep,
+    event_dep,
+    cmdline_dep,
+  ],
+)
+
diff --git a/test/run_vis.cxx b/test/run_vis.cxx
new file mode 100644
index 0000000000..b7e13d09ad
--- /dev/null
+++ b/test/run_vis.cxx
@@ -0,0 +1,336 @@
+#include "net/SocketAddress.hxx"
+#include "net/SocketDescriptor.hxx"
+#include "util/ByteOrder.hxx"
+#include "util/PrintException.hxx"
+
+#include <chrono>
+#include <cstring>
+#include <ctime>
+#include <exception>
+#include <fstream>
+#include <iomanip>
+#include <iostream>
+#include <stdexcept>
+#include <thread>
+
+#include <arpa/inet.h>
+#include <sys/socket.h>
+#include <stdlib.h>
+
+using std::byte;
+using std::make_tuple;
+using std::span;
+
+class CliError : public std::runtime_error
+{
+public:
+	CliError(const char *pmsg) : std::runtime_error(pmsg)
+	{ }
+	CliError(const std::string &msg) : std::runtime_error(msg)
+	{ }
+};
+
+/// Parse the command line, return our parameters
+static std::tuple<std::string, uint16_t, uint16_t, int16_t>
+ParseCl(int argc, char **argv)
+{
+	if (5 != argc) {
+		throw CliError("Four arguments expected");
+	}
+
+	uint16_t port = atoi(argv[2]);
+	if (0 == port) {
+		throw CliError("Couldn't parse port");
+	}
+
+	uint16_t fps = atoi(argv[3]);
+	if (0 == fps) {
+		throw CliError("Couldn't parse fps");
+	}
+
+	int16_t tau = atoi(argv[4]);
+	// Arghhh... no way to distinguish between "0" and error
+
+	return std::make_tuple(argv[1], port, fps, tau);
+}
+
+/// Connect to the MPD visualization server
+static std::variant<std::monostate, SocketDescriptor> 
+Connect(const std::string &host, uint16_t port)
+{
+	struct sockaddr_in addr;
+	addr.sin_family = AF_INET;
+	addr.sin_port = htons(port);
+
+	if (0 >= inet_aton(host.c_str(), &addr.sin_addr)) {
+		std::string msg = "Failed to parse '" + host + "' as a hostname (" +
+			strerror(errno) + ")";
+		throw CliError(msg);
+	}
+
+	SocketAddress sock_addr((const struct sockaddr*)&addr, sizeof(addr));
+
+	SocketDescriptor sock;
+	if (!sock.Create(AF_INET, SOCK_STREAM, 0)) {
+		throw std::runtime_error("Faled to 'Create' the SocketDescriptor.");
+	}
+
+	if (sock.Connect(sock_addr)) {
+		return sock;
+	}
+
+	return std::monostate { };
+}
+
+static std::tuple<uint8_t, uint8_t>
+Handshake(SocketDescriptor &sock, uint16_t fps, int16_t tau)
+{
+	static byte buf[11] = {
+		byte{0x00}, byte{0x00}, // message type
+		byte{0x00}, byte{0x06}, // payload length
+		byte{0x00}, byte{0x01}, // request version 0.1
+	};
+
+	uint16_t fpsn = htons(fps);
+	int16_t taun = htons(tau);
+	std::copy((byte*)&fpsn, (byte*)(&fpsn + 2), buf + 6);
+	std::copy((byte*)&taun, (byte*)(&taun + 2), buf + 8);
+	buf[10] = byte{0};
+
+	ssize_t cb = sock.Write(span(buf, buf+ sizeof(buf)));
+	if (0 >= cb) {
+		throw std::runtime_error(strerror(errno));
+	}
+	if (cb != sizeof(buf)) {
+		throw std::runtime_error("Incomplete write.");
+	}
+
+	cb = sock.Read(span(buf, buf + sizeof(buf)));
+	if (0 >= cb) {
+		throw std::runtime_error(strerror(errno));
+	}
+
+	byte *p = buf;
+	uint16_t msgtype = FromBE16(*(uint16_t *)p); p += 2;
+	if (0x0001 != msgtype) {
+		throw std::runtime_error("Unexpected message type");
+	}
+
+	uint16_t msglen = ntohs(*(uint16_t *)p); p += 2;
+	if (0x0002 != msglen) {
+		throw std::runtime_error("Unexpected message length");
+	}
+
+	uint8_t proto_ver_major = (uint8_t)*p++;
+	uint8_t proto_ver_minor = (uint8_t)*p++;
+
+	return make_tuple(proto_ver_major, proto_ver_minor);
+}
+
+/// Listen for FRAME messages, print-out bass/mids/trebs
+static void
+Listen(SocketDescriptor &sock)
+{
+	using namespace std;
+	using namespace std::chrono;
+
+	byte buf[8192];
+
+	// this will hold num_chan * 8 floats for to compute a weighted average of
+	// recent bass values-- will initialize on first FRAME
+	vector<float> bass;
+	// index of the "next" slot for a bass value
+	size_t bass_idx = 0;
+
+	const float WEIGHTS[] = { 1.67772f, 2.09715f, 2.62144f, 3.2768f, 4.096f, 5.12f, 6.4f, 8.0f };
+
+	for (size_t i = 0; ; ++i) {
+		ssize_t cb = sock.Read(span(buf, buf + sizeof(buf)));
+		if (0 >= cb) {
+			if (0 == errno) {
+				cout << "MPD went away." << endl;
+				return;
+			}
+			throw std::runtime_error(strerror(errno));
+		}
+
+		std::time_t now;
+		if ((std::time_t)-1 == std::time(&now)) {
+			throw std::runtime_error(strerror(errno));
+		}
+
+		if (cb == sizeof(buf)) {
+			throw std::runtime_error("Buffer overflow!") ;
+		}
+
+		// Hmmm... let's begin parsing (tho I think for now I'll just be
+		// interested in bass/mids/trebs as a crude manual test).
+		byte *p = buf;
+
+		uint32_t sentinel = ntohl(*(uint32_t *)p);
+		p += 4;
+		if (0x63ac8403 != sentinel) {
+			throw std::runtime_error("Missing sentinel!");
+		}
+
+		uint16_t msg_type = FromBE16(*(uint16_t*)p); p += 2;
+		if (0x1000 != msg_type) {
+			stringstream stm;
+			stm << "Unexpected message type 0x" << hex << msg_type << "!";
+			throw std::runtime_error(stm.str());
+		}
+
+		uint16_t msg_len	 = FromBE16(*(uint16_t*)p); p += 2;
+		uint16_t num_samp	 = FromBE16(*(uint16_t*)p); p += 2;
+		uint8_t	 num_chan	 = *(uint8_t*)p;		 p += 1;
+		/*uint16_t sample_rate = FromBE16(*(uint16_t*)p);*/ p += 2;
+		
+		if (0 == bass.size()) {
+			bass.resize(num_chan * 8, 0.0f);
+		}
+
+		// Skip over waveforms for now!
+		p += num_samp * num_chan * 4;
+
+		uint16_t num_freq = FromBE16(*(uint16_t*)p); p += 2;
+		/*uint32_t tmp = ntohl(*(uint32_t *)p);*/ p += 4;
+		/*float freq_lo = *(float*)&tmp;*/
+		/*tmp = ntohl(*(uint32_t *)p);*/ p += 4;
+		/*float freq_hi = *(float*)&tmp;*/
+
+		/*uint16_t freq_off = FromBE16(*(uint16_t*)p);*/ p += 2;
+
+		// Let's skip the Fourier coefficients....
+		p += num_chan * num_freq * 8;
+		// as well as the power spectra
+		p += num_chan * num_freq * 4;
+
+		auto now_ms = duration_cast<microseconds>(system_clock::now().time_since_epoch());
+		cout << put_time(gmtime(&now), "%c %Z") << ": [" <<
+			now_ms.count() << "](" <<
+			msg_len << "bytes) ";
+
+		// OK-- let's just grab bass/mids/trebs for each channel.
+		float mean_bass = 0.0f, mean_mids = 0.0f, mean_trebs = 0.0f;
+		for (uint8_t j = 0; j < num_chan; ++j) {
+
+			if (j) {
+				cout << " ";
+			}
+
+			uint32_t tmp = ntohl(*(uint32_t *)p); p += 4;
+			float this_bass = *(float*)&tmp;
+			tmp = ntohl(*(uint32_t *)p); p += 4;
+			float this_mids = *(float*)&tmp;
+			tmp = ntohl(*(uint32_t *)p); p += 4;
+			float this_trebs = *(float*)&tmp;
+
+			mean_bass += this_bass;
+			mean_mids += this_mids;
+			mean_trebs += this_trebs;
+			
+			// record the in this channel for use below in beat detection
+			bass[j * 8 + bass_idx] = this_bass;
+
+			cout << this_bass << "/" << this_mids << "/" << this_trebs;
+		}
+
+		cout << " ";
+
+		mean_bass  /= (float) num_chan;
+		mean_mids  /= (float) num_chan;
+		mean_trebs /= (float) num_chan;
+
+		// beat detection-- very crude. We'll compute a weighted average of the
+		// bass in each channel. Note that this caclulation will be incorrect
+		// for the first seven frames-- meh 🤷
+		float weighted_mean_bass = 0.0f;
+		for (uint8_t j = 0; j < num_chan; ++j) {
+
+			if (j) {
+				cout << "/";
+			}
+
+			// Given the way we're indexing, the weighted sum will come in two
+			// parts:
+
+			// the first will be bass[bass_idx]*WEIGHTS[7] + ... + bass[0]*WEIGHTS[7-bass_idx]
+
+			// the second will be bass[bass_idx+1]*WEIGHTS[0] + ... + bass[7]*WEIGHTS[6-idx]
+			// when idx < 7
+
+			float weighted_mean = 0.0f;
+			for (ptrdiff_t k = bass_idx, n = 0; k >= 0; --k, ++n) {
+				weighted_mean += bass[j*8+k] * WEIGHTS[7-n];
+			}
+			if (bass_idx < 7) {
+				for (size_t k = bass_idx+1, n = 0; k < 8; ++k, ++n) {
+					weighted_mean += bass[j*8+k] * WEIGHTS[n];
+				}
+			}
+			
+			weighted_mean /= 33.2891f; // Sum of weights
+
+			cout << weighted_mean;
+
+			weighted_mean_bass += weighted_mean;
+		}
+
+		bass_idx = (bass_idx + 1) % 8;
+
+		cout << " ";
+
+		// `weighted_mean_bass` is the average weighted average of the bass across
+		// all channels-- this is what we use for our signal.
+		weighted_mean_bass /= (float)num_chan;
+
+		float thresh = weighted_mean_bass * 0.325f;
+		if ((mean_bass - weighted_mean_bass) > thresh) {
+			cout << " BEAT DETECTED";
+		}
+		cout << endl;
+	}
+}
+
+/// Testing client for the visualization output plugin
+/// Invoke as `run_vis mpd-host port fps time-offset`
+int main(int argc, char **argv) {
+	using namespace std;
+
+	try {
+		string mpd_host;
+		int16_t tau;
+		uint16_t port, fps;
+		tie(mpd_host, port, fps, tau) = ParseCl(argc, argv);
+		
+		while (true) {
+
+			auto conn = Connect(mpd_host, port);
+			if (0 == conn.index()) {
+				cout << "Failed to connect; sleeping for fifteen seconds & retrying (hit Ctrl-C to exit)." << endl;
+				std::this_thread::sleep_for(15000ms);
+				continue;			 
+			}
+
+			auto sock = std::get<SocketDescriptor>(conn);
+			cout << "Connected." << endl;
+
+			uint8_t major, minor;
+			tie(major, minor) = Handshake(sock, fps, tau);
+			cout << "Received protocol version " << (int)major <<
+				"." << (int)minor << "." << endl;
+
+			Listen(sock);
+			cout << "Sleeping for thirty seconds & retrying (hit Ctrl-C to exit)." << endl;
+			std::this_thread::sleep_for(30000ms);
+		}
+	} catch (const CliError &ex) {
+		PrintException(ex);
+		return 2;
+	} catch (...) {
+		PrintException(std::current_exception());
+		return EXIT_FAILURE;
+	}
+
+	return EXIT_SUCCESS;
+}