From 719fa1d54568db82e80386b0ea8550433aa91e68 Mon Sep 17 00:00:00 2001
From: Claudio Cabral <cabral1349@gmail.com>
Date: Sat, 15 Jan 2022 16:02:31 +0100
Subject: [PATCH] Add 32 bit support for ALSA driver (#811)

* apply changes without whitespace

* remove neon intrinsics and fix indentation

* update float_32 macro and fix misspellings

* check msbits to determine number of bits in alsa driver

* add better error messages and support for SND_PCM_FORMAT_S32_BE

* log when sample format is not equal to bits

Co-authored-by: Claudio Cabral <clca@bang-olufsen.dk>
Co-authored-by: Claudio Cabral <cl@udio.co>
---
 common/memops.c          | 101 +++++++++++++++++++++++++++++++++++++--
 common/memops.h          |   4 ++
 linux/alsa/alsa_driver.c |  70 +++++++++++++++++++++++++++
 3 files changed, 171 insertions(+), 4 deletions(-)

diff --git a/common/memops.c b/common/memops.c
index 725c49e17..b00af0bf9 100644
--- a/common/memops.c
+++ b/common/memops.c
@@ -73,6 +73,7 @@
    So, for now (October 2008) we use 2^(N-1)-1 as the scaling factor.
 */
 
+#define SAMPLE_32BIT_SCALING  2147483647.0
 #define SAMPLE_24BIT_SCALING  8388607.0f
 #define SAMPLE_16BIT_SCALING  32767.0f
 
@@ -81,6 +82,11 @@
    advice from Fons Adriaensen: make the limits symmetrical
  */
 
+#define SAMPLE_32BIT_MAX   2147483647
+#define SAMPLE_32BIT_MIN   -2147483647
+#define SAMPLE_32BIT_MAX_D  2147483647.0
+#define SAMPLE_32BIT_MIN_D  -2147483647.0
+
 #define SAMPLE_24BIT_MAX  8388607  
 #define SAMPLE_24BIT_MIN  -8388607 
 #define SAMPLE_24BIT_MAX_F  8388607.0f  
@@ -106,6 +112,7 @@
 */
 
 #define f_round(f) lrintf(f)
+#define d_round(f) lrint(f)
 
 #define float_16(s, d)\
 	if ((s) <= NORMALIZED_FLOAT_MIN) {\
@@ -146,6 +153,15 @@
 		(d) = f_round ((s) * SAMPLE_24BIT_SCALING);                    \
 	}
 
+#define float_32(s, d)												\
+	do {															\
+		double clipped = fmin(NORMALIZED_FLOAT_MAX,					\
+				fmax((double)(s), NORMALIZED_FLOAT_MIN));			\
+		double scaled = clipped * SAMPLE_32BIT_MAX_D;				\
+		(d) = d_round(scaled);										\
+	}																\
+	while (0)
+
 /* call this when "s" has already been scaled (e.g. when dithering)
  */
 
@@ -195,6 +211,11 @@ static inline __m128 clip(__m128 s, __m128 min, __m128 max)
     return _mm_min_ps(max, _mm_max_ps(s, min));
 }
 
+static inline __m128d clip_double(__m128d s, __m128d min, __m128d max)
+{
+    return _mm_min_pd(max, _mm_max_pd(s, min));
+}
+
 static inline __m128i float_24_sse(__m128 s)
 {
     const __m128 upper_bound = gen_one(); /* NORMALIZED_FLOAT_MAX */
@@ -274,13 +295,14 @@ void sample_move_dS_floatLE (char *dst, jack_default_audio_sample_t *src, unsign
    
    S      - sample is a jack_default_audio_sample_t, currently (October 2008) a 32 bit floating point value
    Ss     - like S but reverse endian from the host CPU
-   32u24  - sample is an signed 32 bit integer value, but data is in upper 24 bits only
+   32     - sample is a signed 32 bit integer value
+   32u24  - sample is a signed 32 bit integer value, but data is in upper 24 bits only
    32u24s - like 32u24 but reverse endian from the host CPU
-   32l24  - sample is an signed 32 bit integer value, but data is in lower 24 bits only
+   32l24  - sample is a signed 32 bit integer value, but data is in lower 24 bits only
    32l24s - like 32l24 but reverse endian from the host CPU
-   24     - sample is an signed 24 bit integer value
+   24     - sample is a signed 24 bit integer value
    24s    - like 24 but reverse endian from the host CPU
-   16     - sample is an signed 16 bit integer value
+   16     - sample is a signed 16 bit integer value
    16s    - like 16 but reverse endian from the host CPU
 
    For obvious reasons, the reverse endian versions only show as source types.
@@ -290,6 +312,36 @@ void sample_move_dS_floatLE (char *dst, jack_default_audio_sample_t *src, unsign
 
 /* functions for native integer sample data */
 
+void sample_move_d32_sSs (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
+{
+	while (nsamples--) {
+		int32_t z;
+		float_32(*src, z);
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+		dst[0]=(char)(z>>24);
+		dst[1]=(char)(z>>16);
+		dst[2]=(char)(z>>8);
+		dst[3]=(char)(z);
+#elif __BYTE_ORDER == __BIG_ENDIAN
+		dst[0]=(char)(z);
+		dst[1]=(char)(z>>8);
+		dst[2]=(char)(z>>16);
+		dst[3]=(char)(z>>24);
+#endif
+		dst += dst_skip;
+		src++;
+	}
+}
+
+void sample_move_d32_sS (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
+{
+	while (nsamples--) {
+		float_32(*src, *(int32_t *)dst);
+		dst += dst_skip;
+		src++;
+	}
+}
+
 void sample_move_d32u24_sSs (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
 {
 #if defined (__ARM_NEON__) || defined (__ARM_NEON)
@@ -689,6 +741,35 @@ void sample_move_d32l24_sS (char *dst, jack_default_audio_sample_t *src, unsigne
 #endif
 }	
 
+void sample_move_dS_s32s (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip)
+{
+	const jack_default_audio_sample_t scaling = 1.0/SAMPLE_32BIT_SCALING;
+	while (nsamples--) {
+		int32_t x;
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+		x = (unsigned char)(src[0]);
+		x <<= 8;
+		x |= (unsigned char)(src[1]);
+		x <<= 8;
+		x |= (unsigned char)(src[2]);
+		x <<= 8;
+		x |= (unsigned char)(src[3]);
+#elif __BYTE_ORDER == __BIG_ENDIAN
+		x = (unsigned char)(src[3]);
+		x <<= 8;
+		x |= (unsigned char)(src[2]);
+		x <<= 8;
+		x |= (unsigned char)(src[1]);
+		x <<= 8;
+		x |= (unsigned char)(src[0]);
+#endif
+		double extended = x * scaling;
+		*dst = (float)extended;
+		dst++;
+		src += src_skip;
+	}
+}
+
 void sample_move_dS_s32l24s (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip)
 {
 #if defined (__ARM_NEON__) || defined (__ARM_NEON)
@@ -753,6 +834,18 @@ void sample_move_dS_s32l24s (jack_default_audio_sample_t *dst, char *src, unsign
 	}
 }	
 
+void sample_move_dS_s32 (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip)
+{
+	const double scaling = 1.0 / SAMPLE_32BIT_SCALING;
+	while (nsamples--) {
+		int32_t val=(*((int32_t*)src));
+		double extended = val * scaling;
+		*dst = (float)extended;
+		dst++;
+		src += src_skip;
+	}
+}
+
 void sample_move_dS_s32l24 (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip)
 {
 #if defined (__SSE2__) && !defined (__sun__)
diff --git a/common/memops.h b/common/memops.h
index a69087ff4..edc99726e 100644
--- a/common/memops.h
+++ b/common/memops.h
@@ -53,6 +53,8 @@ void sample_move_floatLE_sSs (jack_default_audio_sample_t *dst, char *src, unsig
 void sample_move_dS_floatLE (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state);
 
 /* integer functions */
+void sample_move_d32_sSs             (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state);
+void sample_move_d32_sS              (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state);
 void sample_move_d32u24_sSs          (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state);
 void sample_move_d32u24_sS           (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state);
 void sample_move_d32l24_sSs          (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state);
@@ -81,6 +83,8 @@ void sample_move_dither_tri_d16_sS        (char *dst, jack_default_audio_sample_
 void sample_move_dither_shaped_d16_sSs    (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state);
 void sample_move_dither_shaped_d16_sS     (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state);
 
+void sample_move_dS_s32s             (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip);
+void sample_move_dS_s32              (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip);
 void sample_move_dS_s32u24s          (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip);
 void sample_move_dS_s32u24           (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip);
 void sample_move_dS_s32l24s          (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip);
diff --git a/linux/alsa/alsa_driver.c b/linux/alsa/alsa_driver.c
index f14c88844..bd51ec4b0 100644
--- a/linux/alsa/alsa_driver.c
+++ b/linux/alsa/alsa_driver.c
@@ -314,10 +314,45 @@ alsa_driver_setup_io_function_pointers (alsa_driver_t *driver)
 				break;
 
 			case 4: /* NO DITHER */
+				switch (driver->playback_sample_format)
+				{
+					case SND_PCM_FORMAT_S24_LE:
+					case SND_PCM_FORMAT_S24_BE:
 				driver->write_via_copy = driver->quirk_bswap?
 					sample_move_d32l24_sSs:
 					sample_move_d32l24_sS;
 				break;
+					case SND_PCM_FORMAT_S32_LE:
+					case SND_PCM_FORMAT_S32_BE:
+					{
+						int bits = snd_pcm_hw_params_get_sbits(driver->playback_hw_params);
+						if (bits == 32)
+						{
+							driver->write_via_copy = driver->quirk_bswap?
+								sample_move_d32_sSs:
+								sample_move_d32_sS;
+						}
+						else if (bits == 24)
+						{
+							jack_log("sample format is SND_PCM_FORMAT_S32 but only 24 bits available");
+							driver->write_via_copy = driver->quirk_bswap?
+								sample_move_d32u24_sSs:
+								sample_move_d32u24_sS;
+						}
+						else
+						{
+							jack_error("unsupported sample format for playback: "
+									   "SND_PCM_FORMAT_S32 with %d bits",
+									   bits);
+							exit (1);
+						}
+						break;
+					}
+					default:
+					jack_error("unsupported 4 byte sample_format");
+					exit (1);
+				}
+				break;
 
 			default:
 				jack_error ("impossible sample width (%d) discovered!",
@@ -343,10 +378,45 @@ alsa_driver_setup_io_function_pointers (alsa_driver_t *driver)
 					sample_move_dS_s24;
 				break;
 			case 4:
+				switch (driver->capture_sample_format)
+				{
+					case SND_PCM_FORMAT_S24_LE:
+					case SND_PCM_FORMAT_S24_BE:
 				driver->read_via_copy = driver->quirk_bswap?
 					sample_move_dS_s32l24s:
 					sample_move_dS_s32l24;
 				break;
+					case SND_PCM_FORMAT_S32_LE:
+					case SND_PCM_FORMAT_S32_BE:
+					{
+						int bits = snd_pcm_hw_params_get_sbits(driver->capture_hw_params);
+						if (bits == 32)
+						{
+							driver->read_via_copy = driver->quirk_bswap?
+								sample_move_dS_s32s:
+								sample_move_dS_s32;
+						}
+						else if(bits == 24)
+						{
+							jack_log("sample format is SND_PCM_FORMAT_S32 but only 24 bits available");
+							driver->read_via_copy = driver->quirk_bswap?
+								sample_move_dS_s32u24s:
+								sample_move_dS_s32u24;
+						}
+						else
+						{
+							jack_error("unsupported sample format for capture: "
+									   "SND_PCM_FORMAT_S32 with %d bits",
+									   bits);
+							exit (1);
+						}
+						break;
+					}
+					default:
+                        jack_error("unsupported 4 byte sample_format");
+                        exit (1);
+                }
+				break;
 			}
 		}
 	}