diff --git a/include/libbase64.h b/include/libbase64.h index 16c8ef0e..c7ec0b2c 100644 --- a/include/libbase64.h +++ b/include/libbase64.h @@ -1,6 +1,8 @@ #ifndef _LIBBASE64_H #define _LIBBASE64_H +#include + #ifdef __cplusplus extern "C" { #endif @@ -80,6 +82,14 @@ int base64_decode , int flags ) ; +int base64_decode16 + ( const uint16_t *src + , size_t srclen + , char *out + , size_t *outlen + , int flags + ) ; + /* Call this before calling base64_stream_decode() to init the state. See above * for `flags`; set to 0 for default operation: */ void base64_stream_decode_init @@ -103,6 +113,14 @@ int base64_stream_decode , size_t *outlen ) ; +int base64_stream_decode16 + ( struct base64_state *state + , const uint16_t *src + , size_t srclen + , char *out + , size_t *outlen + ) ; + #ifdef __cplusplus } #endif diff --git a/lib/arch/avx/codec.c b/lib/arch/avx/codec.c index d1a6164f..04e8b64e 100644 --- a/lib/arch/avx/codec.c +++ b/lib/arch/avx/codec.c @@ -15,6 +15,8 @@ #include "../ssse3/enc_translate.c" #include "../sse41/enc_reshuffle.c" +#include "../generic/convert.c" + #endif // __AVX__ BASE64_ENC_FUNCTION(avx) @@ -28,6 +30,9 @@ BASE64_ENC_FUNCTION(avx) #endif } +#define STRING_TYPE uint8_t +#define CHAR_CONVERT(x) (x) +#define LOAD_STRING(c) LOAD_STRING8(c) BASE64_DEC_FUNCTION(avx) { #ifdef __AVX__ @@ -38,3 +43,33 @@ BASE64_DEC_FUNCTION(avx) BASE64_DEC_STUB #endif } +#undef LOAD_STRING +#undef CHAR_CONVERT +#undef STRING_TYPE + +#define STRING_TYPE uint16_t +#define CHAR_CONVERT(x) convert(x) +#define LOAD_STRING(c) LOAD_STRING16(c) +BASE64_DEC16_FUNCTION(avx) +{ +#ifdef __AVX__ +#include "../generic/dec_head.c" +#include "../sse42/dec_loop.c" +#include "../generic/dec_tail.c" +#else + BASE64_DEC_STUB +#endif +} +#undef LOAD_STRING +#undef CHAR_CONVERT +#undef STRING_TYPE + +BASE64_CVT_FUNCTION(avx) +{ +#ifdef __AVX__ + #include "../sse2/convert_loop.c" + #include "../generic/convert_loop.c" +#else + BASE64_CVT_STUB +#endif +} diff --git a/lib/arch/avx2/codec.c b/lib/arch/avx2/codec.c index 114a2bb9..20448549 100644 --- a/lib/arch/avx2/codec.c +++ b/lib/arch/avx2/codec.c @@ -130,6 +130,8 @@ dec_reshuffle (__m256i in) 0, 1, 2, 4, 5, 6, -1, -1)); } +#include "../generic/convert.c" + #endif // __AVX2__ BASE64_ENC_FUNCTION(avx2) @@ -143,6 +145,9 @@ BASE64_ENC_FUNCTION(avx2) #endif } +#define STRING_TYPE uint8_t +#define CHAR_CONVERT(x) (x) +#define LOAD_STRING(c) _mm256_loadu_si256((__m256i *)c) BASE64_DEC_FUNCTION(avx2) { #ifdef __AVX2__ @@ -153,3 +158,33 @@ BASE64_DEC_FUNCTION(avx2) BASE64_DEC_STUB #endif } +#undef LOAD_STRING +#undef CHAR_CONVERT +#undef STRING_TYPE + +#define STRING_TYPE uint16_t +#define CHAR_CONVERT(x) convert(x) +#define LOAD_STRING(c) _mm256_permute4x64_epi64(_mm256_packus_epi16( _mm256_loadu_si256((__m256i *)(c+0)),_mm256_loadu_si256((__m256i *)(c+16))), 0xD8) +BASE64_DEC16_FUNCTION(avx2) +{ +#ifdef __AVX2__ +#include "../generic/dec_head.c" +#include "dec_loop.c" +#include "../generic/dec_tail.c" +#else + BASE64_DEC_STUB +#endif +} +#undef LOAD_STRING +#undef CHAR_CONVERT +#undef STRING_TYPE + +BASE64_CVT_FUNCTION(avx2) +{ +#ifdef __AVX2__ + #include "../sse2/convert_loop.c" + #include "../generic/convert_loop.c" +#else + BASE64_CVT_STUB +#endif +} diff --git a/lib/arch/avx2/dec_loop.c b/lib/arch/avx2/dec_loop.c index 8344004c..5765ef03 100644 --- a/lib/arch/avx2/dec_loop.c +++ b/lib/arch/avx2/dec_loop.c @@ -6,7 +6,7 @@ while (srclen >= 45) { // Load string: - __m256i str = _mm256_loadu_si256((__m256i *)c); + __m256i str = LOAD_STRING(c); // The input consists of six character sets in the Base64 alphabet, // which we need to map back to the 6-bit values they represent. diff --git a/lib/arch/generic/codec.c b/lib/arch/generic/codec.c index ae54a8b8..4b3464a1 100644 --- a/lib/arch/generic/codec.c +++ b/lib/arch/generic/codec.c @@ -5,6 +5,8 @@ #include "../../../include/libbase64.h" #include "../../codecs.h" +#include "../generic/convert.c" + BASE64_ENC_FUNCTION(plain) { #include "enc_head.c" @@ -16,6 +18,8 @@ BASE64_ENC_FUNCTION(plain) #include "enc_tail.c" } +#define STRING_TYPE uint8_t +#define CHAR_CONVERT(x) (x) BASE64_DEC_FUNCTION(plain) { #include "dec_head.c" @@ -26,3 +30,20 @@ BASE64_DEC_FUNCTION(plain) #endif #include "dec_tail.c" } +#undef CHAR_CONVERT +#undef STRING_TYPE + +#define STRING_TYPE uint16_t +#define CHAR_CONVERT(x) convert(x) +BASE64_DEC16_FUNCTION(plain) +{ +#include "dec_head.c" +#include "dec_tail.c" +} +#undef CHAR_CONVERT +#undef STRING_TYPE + +BASE64_CVT_FUNCTION(plain) +{ + #include "convert_loop.c" +} diff --git a/lib/arch/generic/convert.c b/lib/arch/generic/convert.c new file mode 100644 index 00000000..7400b7d9 --- /dev/null +++ b/lib/arch/generic/convert.c @@ -0,0 +1,13 @@ +static inline uint8_t +convert (const uint16_t in) +{ + unsigned value = in; +#if 0 + unsigned mask = (unsigned)((0 - (int)(value >> 8)) >> (sizeof(int) * 8U - 1U)); + + value |= mask; + return (uint8_t)value; +#else + return (value > 255U) ? 255U : value; +#endif +} diff --git a/lib/arch/generic/convert_loop.c b/lib/arch/generic/convert_loop.c new file mode 100644 index 00000000..b606ebd1 --- /dev/null +++ b/lib/arch/generic/convert_loop.c @@ -0,0 +1,4 @@ +while (len > 0U) { + *dst++ = (char)convert(*src++); + len--; +} diff --git a/lib/arch/generic/dec_head.c b/lib/arch/generic/dec_head.c index 4aedcd3f..425fcdb2 100644 --- a/lib/arch/generic/dec_head.c +++ b/lib/arch/generic/dec_head.c @@ -1,5 +1,5 @@ int ret = 0; -const uint8_t *c = (const uint8_t *)src; +const STRING_TYPE *c = (const STRING_TYPE *)src; uint8_t *o = (uint8_t *)out; uint8_t q; @@ -17,7 +17,7 @@ if (st.eof) { // If there was a trailing '=' to check, check it: if (srclen && (st.eof == BASE64_AEOF)) { st.eof = BASE64_EOF; - ret = (base64_table_dec[*c++] == 254) ? 1 : 0; + ret = (base64_table_dec[CHAR_CONVERT(*c++)] == 254) ? 1 : 0; } return ret; } diff --git a/lib/arch/generic/dec_tail.c b/lib/arch/generic/dec_tail.c index be40fedf..03a9c130 100644 --- a/lib/arch/generic/dec_tail.c +++ b/lib/arch/generic/dec_tail.c @@ -2,7 +2,7 @@ ret = 1; break; } - if ((q = base64_table_dec[*c++]) >= 254) { + if ((q = base64_table_dec[CHAR_CONVERT(*c++)]) >= 254) { st.eof = BASE64_EOF; // Treat character '=' as invalid for byte 0: break; @@ -14,7 +14,7 @@ ret = 1; break; } - if ((q = base64_table_dec[*c++]) >= 254) { + if ((q = base64_table_dec[CHAR_CONVERT(*c++)]) >= 254) { st.eof = BASE64_EOF; // Treat character '=' as invalid for byte 1: break; @@ -28,14 +28,14 @@ ret = 1; break; } - if ((q = base64_table_dec[*c++]) >= 254) { + if ((q = base64_table_dec[CHAR_CONVERT(*c++)]) >= 254) { // When q == 254, the input char is '='. // Check if next byte is also '=': if (q == 254) { if (srclen-- != 0) { // EOF: st.eof = BASE64_EOF; - ret = (base64_table_dec[*c++] == 254) ? 1 : 0; + ret = (base64_table_dec[CHAR_CONVERT(*c++)] == 254) ? 1 : 0; break; } else { @@ -58,7 +58,7 @@ ret = 1; break; } - if ((q = base64_table_dec[*c++]) >= 254) { + if ((q = base64_table_dec[CHAR_CONVERT(*c++)]) >= 254) { st.eof = BASE64_EOF; // When q == 254, the input char is '='. Return 1 and EOF. // When q == 255, the input char is invalid. Return 0 and EOF. diff --git a/lib/arch/neon32/codec.c b/lib/arch/neon32/codec.c index e7288fcc..25f0a579 100644 --- a/lib/arch/neon32/codec.c +++ b/lib/arch/neon32/codec.c @@ -111,6 +111,8 @@ enc_translate (uint8x16x4_t in) return out; } +#include "../generic/convert.c" + #endif // Stride size is so large on these NEON 32-bit functions @@ -140,3 +142,13 @@ BASE64_DEC_FUNCTION(neon32) BASE64_DEC_STUB #endif } + + +BASE64_CVT_FUNCTION(neon32) +{ +#if (defined(__arm__) && defined(__ARM_NEON__)) + #include "../generic/convert_loop.c" +#else + BASE64_CVT_STUB +#endif +} diff --git a/lib/arch/neon64/codec.c b/lib/arch/neon64/codec.c index c101df7c..cece1dbf 100644 --- a/lib/arch/neon64/codec.c +++ b/lib/arch/neon64/codec.c @@ -41,6 +41,9 @@ static const char *base64_table_enc_transposed = "Oeu+" "Pfv/" }; + +#include "../generic/convert.c" + #endif // Stride size is so large on these NEON 64-bit functions @@ -73,3 +76,12 @@ BASE64_DEC_FUNCTION(neon64) BASE64_DEC_STUB #endif } + +BASE64_CVT_FUNCTION(neon64) +{ +#if (defined(__aarch64__) && defined(__ARM_NEON__)) + #include "../generic/convert_loop.c" +#else + BASE64_CVT_STUB +#endif +} diff --git a/lib/arch/sse2/compare_macros.h b/lib/arch/sse2/compare_macros.h index 76d92887..d06e1218 100644 --- a/lib/arch/sse2/compare_macros.h +++ b/lib/arch/sse2/compare_macros.h @@ -2,3 +2,5 @@ #define CMPEQ(s,n) _mm_cmpeq_epi8((s), _mm_set1_epi8(n)) #define REPLACE(s,n) _mm_and_si128((s), _mm_set1_epi8(n)) #define RANGE(s,a,b) _mm_andnot_si128(CMPGT((s), (b)), CMPGT((s), (a) - 1)) +#define LOAD_STRING8(c) _mm_loadu_si128((__m128i *)c) +#define LOAD_STRING16(c) _mm_packus_epi16(_mm_loadu_si128((__m128i *)(c + 0)), _mm_loadu_si128((__m128i *)(c + 8))) diff --git a/lib/arch/sse2/convert_loop.c b/lib/arch/sse2/convert_loop.c new file mode 100644 index 00000000..f9c2537d --- /dev/null +++ b/lib/arch/sse2/convert_loop.c @@ -0,0 +1,53 @@ +while (len >= 64U) { + __m128i src0 = _mm_loadu_si128((const __m128i*)(src + 0)); + __m128i src1 = _mm_loadu_si128((const __m128i*)(src + 8)); + __m128i src2 = _mm_loadu_si128((const __m128i*)(src + 16)); + __m128i src3 = _mm_loadu_si128((const __m128i*)(src + 24)); + __m128i src4 = _mm_loadu_si128((const __m128i*)(src + 32)); + __m128i src5 = _mm_loadu_si128((const __m128i*)(src + 40)); + __m128i src6 = _mm_loadu_si128((const __m128i*)(src + 48)); + __m128i src7 = _mm_loadu_si128((const __m128i*)(src + 56)); + + + __m128i dst0 = _mm_packus_epi16(src0, src1); + __m128i dst1 = _mm_packus_epi16(src2, src3); + __m128i dst2 = _mm_packus_epi16(src4, src5); + __m128i dst3 = _mm_packus_epi16(src6, src7); + + _mm_storeu_si128((__m128i*)(dst + 0), dst0); + _mm_storeu_si128((__m128i*)(dst + 16), dst1); + _mm_storeu_si128((__m128i*)(dst + 32), dst2); + _mm_storeu_si128((__m128i*)(dst + 48), dst3); + + len-= 64U; + src += 64U; + dst += 64U; +} +if (len & 32U) { + __m128i src0 = _mm_loadu_si128((const __m128i*)(src + 0)); + __m128i src1 = _mm_loadu_si128((const __m128i*)(src + 8)); + __m128i src2 = _mm_loadu_si128((const __m128i*)(src + 16)); + __m128i src3 = _mm_loadu_si128((const __m128i*)(src + 24)); + + __m128i dst0 = _mm_packus_epi16(src0, src1); + __m128i dst1 = _mm_packus_epi16(src2, src3); + + _mm_storeu_si128((__m128i*)(dst + 0), dst0); + _mm_storeu_si128((__m128i*)(dst + 16), dst1); + + len-= 32U; + src += 32U; + dst += 32U; +} +if (len & 16U) { + __m128i src0 = _mm_loadu_si128((const __m128i*)(src + 0)); + __m128i src1 = _mm_loadu_si128((const __m128i*)(src + 8)); + + __m128i dst0 = _mm_packus_epi16(src0, src1); + + _mm_storeu_si128((__m128i*)(dst + 0), dst0); + + len-= 16U; + src += 16U; + dst += 16U; +} diff --git a/lib/arch/sse41/codec.c b/lib/arch/sse41/codec.c index d9e33b5e..20526072 100644 --- a/lib/arch/sse41/codec.c +++ b/lib/arch/sse41/codec.c @@ -15,6 +15,8 @@ #include "../ssse3/enc_translate.c" #include "enc_reshuffle.c" +#include "../generic/convert.c" + #endif // __SSE4_1__ BASE64_ENC_FUNCTION(sse41) @@ -28,6 +30,9 @@ BASE64_ENC_FUNCTION(sse41) #endif } +#define STRING_TYPE uint8_t +#define CHAR_CONVERT(x) (x) +#define LOAD_STRING(c) LOAD_STRING8(c) BASE64_DEC_FUNCTION(sse41) { #ifdef __SSE4_1__ @@ -38,3 +43,33 @@ BASE64_DEC_FUNCTION(sse41) BASE64_DEC_STUB #endif } +#undef LOAD_STRING +#undef CHAR_CONVERT +#undef STRING_TYPE + +#define STRING_TYPE uint16_t +#define CHAR_CONVERT(x) convert(x) +#define LOAD_STRING(c) LOAD_STRING16(c) +BASE64_DEC16_FUNCTION(sse41) +{ +#ifdef __SSE4_1__ +#include "../generic/dec_head.c" +#include "../ssse3/dec_loop.c" +#include "../generic/dec_tail.c" +#else + BASE64_DEC_STUB +#endif +} +#undef LOAD_STRING +#undef CHAR_CONVERT +#undef STRING_TYPE + +BASE64_CVT_FUNCTION(sse41) +{ +#ifdef __SSE4_1__ + #include "../sse2/convert_loop.c" + #include "../generic/convert_loop.c" +#else + BASE64_CVT_STUB +#endif +} diff --git a/lib/arch/sse42/codec.c b/lib/arch/sse42/codec.c index e9c1e6ca..c1671739 100644 --- a/lib/arch/sse42/codec.c +++ b/lib/arch/sse42/codec.c @@ -15,6 +15,8 @@ #include "../ssse3/enc_translate.c" #include "../sse41/enc_reshuffle.c" +#include "../generic/convert.c" + #endif // __SSE4_2__ BASE64_ENC_FUNCTION(sse42) @@ -28,6 +30,9 @@ BASE64_ENC_FUNCTION(sse42) #endif } +#define STRING_TYPE uint8_t +#define CHAR_CONVERT(x) (x) +#define LOAD_STRING(c) LOAD_STRING8(c) BASE64_DEC_FUNCTION(sse42) { #ifdef __SSE4_2__ @@ -38,3 +43,33 @@ BASE64_DEC_FUNCTION(sse42) BASE64_DEC_STUB #endif } +#undef LOAD_STRING +#undef CHAR_CONVERT +#undef STRING_TYPE + +#define STRING_TYPE uint16_t +#define CHAR_CONVERT(x) convert(x) +#define LOAD_STRING(c) LOAD_STRING16(c) +BASE64_DEC16_FUNCTION(sse42) +{ +#ifdef __SSE4_2__ +#include "../generic/dec_head.c" +#include "dec_loop.c" +#include "../generic/dec_tail.c" +#else + BASE64_DEC_STUB +#endif +} +#undef LOAD_STRING +#undef CHAR_CONVERT +#undef STRING_TYPE + +BASE64_CVT_FUNCTION(sse42) +{ +#ifdef __SSE4_2__ + #include "../sse2/convert_loop.c" + #include "../generic/convert_loop.c" +#else + BASE64_CVT_STUB +#endif +} diff --git a/lib/arch/sse42/dec_loop.c b/lib/arch/sse42/dec_loop.c index f7e55dcf..a7106ad3 100644 --- a/lib/arch/sse42/dec_loop.c +++ b/lib/arch/sse42/dec_loop.c @@ -6,7 +6,7 @@ while (srclen >= 24) { // Load string: - __m128i str = _mm_loadu_si128((__m128i *)c); + __m128i str = LOAD_STRING(c); // The input consists of six character sets in the Base64 alphabet, // which we need to map back to the 6-bit values they represent. diff --git a/lib/arch/ssse3/codec.c b/lib/arch/ssse3/codec.c index b00cb0af..dd2adbca 100644 --- a/lib/arch/ssse3/codec.c +++ b/lib/arch/ssse3/codec.c @@ -49,6 +49,8 @@ enc_reshuffle (__m128i in) return _mm_bswap_epi32(indices); } +#include "../generic/convert.c" + #endif // __SSSE3__ BASE64_ENC_FUNCTION(ssse3) @@ -62,6 +64,9 @@ BASE64_ENC_FUNCTION(ssse3) #endif } +#define STRING_TYPE uint8_t +#define CHAR_CONVERT(x) (x) +#define LOAD_STRING(c) LOAD_STRING8(c) BASE64_DEC_FUNCTION(ssse3) { #ifdef __SSSE3__ @@ -72,3 +77,33 @@ BASE64_DEC_FUNCTION(ssse3) BASE64_DEC_STUB #endif } +#undef LOAD_STRING +#undef CHAR_CONVERT +#undef STRING_TYPE + +#define STRING_TYPE uint16_t +#define CHAR_CONVERT(x) convert(x) +#define LOAD_STRING(c) LOAD_STRING16(c) +BASE64_DEC16_FUNCTION(ssse3) +{ +#ifdef __SSSE3__ +#include "../generic/dec_head.c" +#include "dec_loop.c" +#include "../generic/dec_tail.c" +#else + BASE64_DEC_STUB +#endif +} +#undef LOAD_STRING +#undef CHAR_CONVERT +#undef STRING_TYPE + +BASE64_CVT_FUNCTION(ssse3) +{ +#ifdef __SSSE3__ + #include "../sse2/convert_loop.c" + #include "../generic/convert_loop.c" +#else + BASE64_CVT_STUB +#endif +} diff --git a/lib/arch/ssse3/dec_loop.c b/lib/arch/ssse3/dec_loop.c index e660d4c6..d7a83409 100644 --- a/lib/arch/ssse3/dec_loop.c +++ b/lib/arch/ssse3/dec_loop.c @@ -6,7 +6,7 @@ while (srclen >= 24) { // Load string: - __m128i str = _mm_loadu_si128((__m128i *)c); + __m128i str = LOAD_STRING(c); // The input consists of six character sets in the Base64 alphabet, // which we need to map back to the 6-bit values they represent. diff --git a/lib/codec_choose.c b/lib/codec_choose.c index 0180aa5f..5089380f 100644 --- a/lib/codec_choose.c +++ b/lib/codec_choose.c @@ -66,15 +66,22 @@ #define BASE64_CODEC_FUNCS(arch) \ BASE64_ENC_FUNCTION(arch); \ BASE64_DEC_FUNCTION(arch); \ + BASE64_CVT_FUNCTION(arch); BASE64_CODEC_FUNCS(avx2) +BASE64_DEC16_FUNCTION(avx2); BASE64_CODEC_FUNCS(neon32) BASE64_CODEC_FUNCS(neon64) BASE64_CODEC_FUNCS(plain) +BASE64_DEC16_FUNCTION(plain); BASE64_CODEC_FUNCS(ssse3) +BASE64_DEC16_FUNCTION(ssse3); BASE64_CODEC_FUNCS(sse41) +BASE64_DEC16_FUNCTION(sse41); BASE64_CODEC_FUNCS(sse42) +BASE64_DEC16_FUNCTION(sse42); BASE64_CODEC_FUNCS(avx) +BASE64_DEC16_FUNCTION(avx); static bool codec_choose_forced (struct codec *codec, int flags) @@ -86,44 +93,59 @@ codec_choose_forced (struct codec *codec, int flags) if (!(flags & 0xFF)) { return false; } + codec->dec16 = NULL; if (flags & BASE64_FORCE_AVX2) { codec->enc = base64_stream_encode_avx2; codec->dec = base64_stream_decode_avx2; + codec->dec16 = base64_stream_decode16_avx2; + codec->cvt = base64_cvt_avx2; return true; } if (flags & BASE64_FORCE_NEON32) { codec->enc = base64_stream_encode_neon32; codec->dec = base64_stream_decode_neon32; + codec->cvt = base64_cvt_neon32; return true; } if (flags & BASE64_FORCE_NEON64) { codec->enc = base64_stream_encode_neon64; codec->dec = base64_stream_decode_neon64; + codec->cvt = base64_cvt_neon64; return true; } if (flags & BASE64_FORCE_PLAIN) { codec->enc = base64_stream_encode_plain; codec->dec = base64_stream_decode_plain; + codec->dec16 = base64_stream_decode16_plain; + codec->cvt = base64_cvt_plain; return true; } if (flags & BASE64_FORCE_SSSE3) { codec->enc = base64_stream_encode_ssse3; codec->dec = base64_stream_decode_ssse3; + codec->dec16 = base64_stream_decode16_ssse3; + codec->cvt = base64_cvt_ssse3; return true; } if (flags & BASE64_FORCE_SSE41) { codec->enc = base64_stream_encode_sse41; codec->dec = base64_stream_decode_sse41; + codec->dec16 = base64_stream_decode16_sse41; + codec->cvt = base64_cvt_sse41; return true; } if (flags & BASE64_FORCE_SSE42) { codec->enc = base64_stream_encode_sse42; codec->dec = base64_stream_decode_sse42; + codec->dec16 = base64_stream_decode16_sse42; + codec->cvt = base64_cvt_sse42; return true; } if (flags & BASE64_FORCE_AVX) { codec->enc = base64_stream_encode_avx; codec->dec = base64_stream_decode_avx; + codec->dec16 = base64_stream_decode16_avx; + codec->cvt = base64_cvt_avx; return true; } return false; @@ -141,9 +163,11 @@ codec_choose_arm (struct codec *codec) #if defined(__aarch64__) && HAVE_NEON64 codec->enc = base64_stream_encode_neon64; codec->dec = base64_stream_decode_neon64; + codec->cvt = base64_cvt_neon64; #else codec->enc = base64_stream_encode_neon32; codec->dec = base64_stream_decode_neon32; + codec->cvt = base64_cvt_neon32; #endif return true; @@ -192,6 +216,7 @@ codec_choose_x86 (struct codec *codec) if (ebx & bit_AVX2) { codec->enc = base64_stream_encode_avx2; codec->dec = base64_stream_decode_avx2; + codec->cvt = base64_cvt_avx2; return true; } #endif @@ -199,6 +224,7 @@ codec_choose_x86 (struct codec *codec) if (ecx & bit_AVX) { codec->enc = base64_stream_encode_avx; codec->dec = base64_stream_decode_avx; + codec->cvt = base64_cvt_avx; return true; } #endif @@ -214,6 +240,7 @@ codec_choose_x86 (struct codec *codec) if (ecx & bit_SSE42) { codec->enc = base64_stream_encode_sse42; codec->dec = base64_stream_decode_sse42; + codec->cvt = base64_cvt_sse42; return true; } } @@ -226,6 +253,7 @@ codec_choose_x86 (struct codec *codec) if (ecx & bit_SSE41) { codec->enc = base64_stream_encode_sse41; codec->dec = base64_stream_decode_sse41; + codec->cvt = base64_cvt_sse41; return true; } } @@ -238,6 +266,7 @@ codec_choose_x86 (struct codec *codec) if (ecx & bit_SSSE3) { codec->enc = base64_stream_encode_ssse3; codec->dec = base64_stream_decode_ssse3; + codec->cvt = base64_cvt_ssse3; return true; } } @@ -267,4 +296,5 @@ codec_choose (struct codec *codec, int flags) } codec->enc = base64_stream_encode_plain; codec->dec = base64_stream_decode_plain; + codec->cvt = base64_cvt_plain; } diff --git a/lib/codecs.h b/lib/codecs.h index 11ed7829..9bcc998e 100644 --- a/lib/codecs.h +++ b/lib/codecs.h @@ -15,6 +15,20 @@ , char *out \ , size_t *outlen \ ) +#define BASE64_DEC16_PARAMS \ + ( struct base64_state *state \ + , const uint16_t *src \ + , size_t srclen \ + , char *out \ + , size_t *outlen \ + ) + +// Function parameters for conversion functions: +#define BASE64_CVT_PARAMS \ + ( const uint16_t *src \ + , char *dst \ + , size_t len \ + ) // Function signature for encoding functions: #define BASE64_ENC_FUNCTION(arch) \ @@ -28,6 +42,17 @@ base64_stream_decode_ ## arch \ BASE64_DEC_PARAMS +#define BASE64_DEC16_FUNCTION(arch) \ + int \ + base64_stream_decode16_ ## arch \ + BASE64_DEC_PARAMS + +// Function signature for decoding functions: +#define BASE64_CVT_FUNCTION(arch) \ + void \ + base64_cvt_ ## arch \ + BASE64_CVT_PARAMS + // Cast away unused variable, silence compiler: #define UNUSED(x) ((void)(x)) @@ -50,10 +75,18 @@ \ return -1; +// Stub function when conversion arch unsupported: +#define BASE64_CVT_STUB \ + UNUSED(src); \ + UNUSED(dst); \ + UNUSED(len); + struct codec { - void (* enc) BASE64_ENC_PARAMS; - int (* dec) BASE64_DEC_PARAMS; + void (* enc) BASE64_ENC_PARAMS; + int (* dec) BASE64_DEC_PARAMS; + int (* dec16) BASE64_DEC16_PARAMS; + void (* cvt) BASE64_CVT_PARAMS; }; // Define machine endianness. This is for GCC: diff --git a/lib/lib.c b/lib/lib.c index c02e2db9..1abbc87b 100644 --- a/lib/lib.c +++ b/lib/lib.c @@ -120,6 +120,44 @@ base64_stream_decode return codec.dec(state, src, srclen, out, outlen); } +int +base64_stream_decode16 + ( struct base64_state *state + , const uint16_t *src + , size_t srclen + , char *out + , size_t *outlen + ) +{ + int ret = 1; + size_t asciilen = (srclen > (4U * 1024U)) ? (4U * 1024U) : srclen; + char ascii[asciilen] __attribute__((aligned(64))); + size_t outl = 0U; + + if (0 && codec.dec16) { + return codec.dec16(state, src, srclen, out, outlen); + } + + while (ret && (asciilen < srclen)) + { + size_t outlenround = 0U; + codec.cvt(src, ascii, asciilen); + ret = codec.dec(state, ascii, asciilen, out, &outlenround); + src += asciilen; + srclen -= asciilen; + out += outlenround; + outl += outlenround; + } + if (ret) { + size_t outlenround = 0U; + codec.cvt(src, ascii, srclen); + ret = codec.dec(state, ascii, srclen, out, &outlenround); + outl += outlenround; + } + *outlen = outl; + return ret; +} + #ifdef _OPENMP // Due to the overhead of initializing OpenMP and creating a team of @@ -186,3 +224,21 @@ base64_decode // Feed the whole string to the stream reader: return base64_stream_decode(&state, src, srclen, out, outlen); } + +int base64_decode16 + ( const uint16_t *src + , size_t srclen + , char *out + , size_t *outlen + , int flags + ) +{ + struct base64_state state; + + // Init the stream reader: + base64_stream_decode_init(&state, flags); + + // Feed the whole string to the stream reader: + return base64_stream_decode16(&state, src, srclen, out, outlen); +} + diff --git a/test/benchmark.c b/test/benchmark.c index 90ea380f..ca0c6ec2 100644 --- a/test/benchmark.c +++ b/test/benchmark.c @@ -24,8 +24,10 @@ struct buffers { char *reg; char *enc; + uint16_t *enc16; size_t regsz; size_t encsz; + bool enc16ready; }; // Define buffer sizes to test with: @@ -36,11 +38,11 @@ static struct bufsize { int batch; } sizes[] = { - { "10 MB", MB * 10, 10, 1 }, - { "1 MB", MB * 1, 10, 10 }, - { "100 KB", KB * 100, 10, 100 }, - { "10 KB", KB * 10, 100, 100 }, - { "1 KB", KB * 1, 100, 1000 }, + { "10 MB", MB * 10, 100, 1 }, + { "1 MB", MB * 1, 100, 10 }, + { "100 KB", KB * 100, 100, 100 }, + { "10 KB", KB * 10, 1000, 100 }, + { "1 KB", KB * 1, 1000, 1000 }, }; static inline float @@ -134,7 +136,7 @@ codec_bench_enc (struct buffers *b, const struct bufsize *bs, const char *name, fastest = timediff; } - printf("%s\tencode\t%.02f MB/sec\n", name, bytes_to_mb(b->regsz) / fastest); + //printf("%-6s encode %8.2f MB/sec\n", name, bytes_to_mb(b->regsz) / fastest); } static void @@ -163,14 +165,56 @@ codec_bench_dec (struct buffers *b, const struct bufsize *bs, const char *name, fastest = timediff; } - printf("%s\tdecode\t%.02f MB/sec\n", name, bytes_to_mb(b->encsz) / fastest); + printf("%-6s decode %8.2f MB/sec\n", name, bytes_to_mb(b->encsz) / fastest); +} + +static void +codec_bench_dec16 (struct buffers *b, const struct bufsize *bs, const char *name, unsigned int flags) +{ + float timediff, fastest = -1.0f; + base64_timespec start, end; + + // Reset buffer size: + b->encsz = bs->len; + + // Repeat benchmark a number of times for a fair test: + for (int i = bs->repeat; i; i--) { + + // Timing loop, use batches to increase timer resolution: + base64_gettime(&start); + for (int j = bs->batch; j; j--) + base64_decode16(b->enc16, b->encsz, b->reg, &b->regsz, flags); + base64_gettime(&end); + + // Calculate average time of batch: + timediff = timediff_sec(&start, &end) / bs->batch; + + // Update fastest time seen: + if (fastest < 0.0f || timediff < fastest) + fastest = timediff; + } + + printf("%-6s decode16 %8.2f MB/sec\n", name, bytes_to_mb(b->encsz) / fastest); +} + +static void +codec_bench_prepare_enc16 (struct buffers *b) +{ + for (size_t i = 0U; i < b->encsz; ++i) { + b->enc16[i] = (uint8_t)b->enc[i]; + } } static void codec_bench (struct buffers *b, const struct bufsize *bs, const char *name, unsigned int flags) { codec_bench_enc(b, bs, name, flags); + if (!b->enc16ready) { + codec_bench_prepare_enc16(b); + b->enc16ready = true; + } codec_bench_dec(b, bs, name, flags); + codec_bench_dec16(b, bs, name, flags); } int @@ -183,6 +227,7 @@ main () // Set buffer sizes to largest buffer length: b.regsz = sizes[0].len; b.encsz = sizes[0].len * 5 / 3; + b.enc16ready = false; // Allocate space for megabytes of random data: if ((b.reg = malloc(b.regsz)) == NULL) { @@ -198,10 +243,17 @@ main () goto err1; } + // Allocate space for UTF-16 encoded output: + if ((b.enc16 = malloc(b.encsz * sizeof(uint16_t))) == NULL) { + errmsg = "Out of memory"; + ret = 1; + goto err2; + } + // Fill buffer with random data: if (get_random_data(&b, &errmsg) == false) { ret = 1; - goto err2; + goto err3; } // Loop over all buffer sizes: @@ -216,6 +268,7 @@ main () }; // Free memory: +err3: free(b.enc16); err2: free(b.enc); err1: free(b.reg); err0: if (errmsg) diff --git a/test/codec_supported.h b/test/codec_supported.h index 5ece82f4..18baec08 100644 --- a/test/codec_supported.h +++ b/test/codec_supported.h @@ -1,3 +1,3 @@ -char **codecs; +extern char **codecs; int codec_supported (int flags); diff --git a/test/moby_dick.h b/test/moby_dick.h index 19705355..1d03b6fa 100644 --- a/test/moby_dick.h +++ b/test/moby_dick.h @@ -39,3 +39,67 @@ static const char *moby_dick_base64 = "0IGtuZXcKaXQsIGFsbW9zdCBhbGwgbWVuIGluIHRoZWlyIGRlZ3JlZSwgc29tZSB0aW1l" "IG9yIG90aGVyLCBjaGVyaXNoIHZlcnkKbmVhcmx5IHRoZSBzYW1lIGZlZWxpbmdzIHRvd" "2FyZHMgdGhlIG9jZWFuIHdpdGggbWUuCg=="; + +static const char *moby_dick_base64_16u = + "Q\0""2\0""F\0""s\0""b\0""C\0""B\0""t\0""Z\0""S\0""B\0""J\0""c\0""2\0""h\0""t\0""Y\0""W\0""V\0""s\0""L\0""i\0""B\0""T\0" + "b\0""2\0""1\0""l\0""I\0""H\0""l\0""l\0""Y\0""X\0""J\0""z\0""I\0""G\0""F\0""n\0""b\0""y\0""0\0""t\0""b\0""m\0""V\0""2\0" + "Z\0""X\0""I\0""g\0""b\0""W\0""l\0""u\0""Z\0""C\0""B\0""o\0""b\0""3\0""c\0""g\0""b\0""G\0""9\0""u\0""Z\0""y\0""B\0""w\0" + "c\0""m\0""V\0""j\0""a\0""X\0""N\0""l\0""b\0""H\0""k\0""t\0""L\0""W\0""h\0""h\0""d\0""m\0""l\0""u\0""Z\0""w\0""p\0""s\0" + "a\0""X\0""R\0""0\0""b\0""G\0""U\0""g\0""b\0""3\0""I\0""g\0""b\0""m\0""8\0""g\0""b\0""W\0""9\0""u\0""Z\0""X\0""k\0""g\0" + "a\0""W\0""4\0""g\0""b\0""X\0""k\0""g\0""c\0""H\0""V\0""y\0""c\0""2\0""U\0""s\0""I\0""G\0""F\0""u\0""Z\0""C\0""B\0""u\0" + "b\0""3\0""R\0""o\0""a\0""W\0""5\0""n\0""I\0""H\0""B\0""h\0""c\0""n\0""R\0""p\0""Y\0""3\0""V\0""s\0""Y\0""X\0""I\0""g\0" + "d\0""G\0""8\0""g\0""a\0""W\0""5\0""0\0""Z\0""X\0""J\0""l\0""c\0""3\0""Q\0""g\0""b\0""W\0""U\0""g\0""b\0""2\0""4\0""K\0" + "c\0""2\0""h\0""v\0""c\0""m\0""U\0""s\0""I\0""E\0""k\0""g\0""d\0""G\0""h\0""v\0""d\0""W\0""d\0""o\0""d\0""C\0""B\0""J\0" + "I\0""H\0""d\0""v\0""d\0""W\0""x\0""k\0""I\0""H\0""N\0""h\0""a\0""W\0""w\0""g\0""Y\0""W\0""J\0""v\0""d\0""X\0""Q\0""g\0" + "Y\0""S\0""B\0""s\0""a\0""X\0""R\0""0\0""b\0""G\0""U\0""g\0""Y\0""W\0""5\0""k\0""I\0""H\0""N\0""l\0""Z\0""S\0""B\0""0\0" + "a\0""G\0""U\0""g\0""d\0""2\0""F\0""0\0""Z\0""X\0""J\0""5\0""I\0""H\0""B\0""h\0""c\0""n\0""Q\0""g\0""b\0""2\0""Y\0""K\0" + "d\0""G\0""h\0""l\0""I\0""H\0""d\0""v\0""c\0""m\0""x\0""k\0""L\0""i\0""B\0""J\0""d\0""C\0""B\0""p\0""c\0""y\0""B\0""h\0" + "I\0""H\0""d\0""h\0""e\0""S\0""B\0""J\0""I\0""G\0""h\0""h\0""d\0""m\0""U\0""g\0""b\0""2\0""Y\0""g\0""Z\0""H\0""J\0""p\0" + "d\0""m\0""l\0""u\0""Z\0""y\0""B\0""v\0""Z\0""m\0""Y\0""g\0""d\0""G\0""h\0""l\0""I\0""H\0""N\0""w\0""b\0""G\0""V\0""l\0" + "b\0""i\0""B\0""h\0""b\0""m\0""Q\0""g\0""c\0""m\0""V\0""n\0""d\0""W\0""x\0""h\0""d\0""G\0""l\0""u\0""Z\0""w\0""p\0""0\0" + "a\0""G\0""U\0""g\0""Y\0""2\0""l\0""y\0""Y\0""3\0""V\0""s\0""Y\0""X\0""R\0""p\0""b\0""2\0""4\0""u\0""I\0""F\0""d\0""o\0" + "Z\0""W\0""5\0""l\0""d\0""m\0""V\0""y\0""I\0""E\0""k\0""g\0""Z\0""m\0""l\0""u\0""Z\0""C\0""B\0""t\0""e\0""X\0""N\0""l\0" + "b\0""G\0""Y\0""g\0""Z\0""3\0""J\0""v\0""d\0""2\0""l\0""u\0""Z\0""y\0""B\0""n\0""c\0""m\0""l\0""t\0""I\0""G\0""F\0""i\0" + "b\0""3\0""V\0""0\0""I\0""H\0""R\0""o\0""Z\0""S\0""B\0""t\0""b\0""3\0""V\0""0\0""a\0""D\0""s\0""K\0""d\0""2\0""h\0""l\0" + "b\0""m\0""V\0""2\0""Z\0""X\0""I\0""g\0""a\0""X\0""Q\0""g\0""a\0""X\0""M\0""g\0""Y\0""S\0""B\0""k\0""Y\0""W\0""1\0""w\0" + "L\0""C\0""B\0""k\0""c\0""m\0""l\0""6\0""e\0""m\0""x\0""5\0""I\0""E\0""5\0""v\0""d\0""m\0""V\0""t\0""Y\0""m\0""V\0""y\0" + "I\0""G\0""l\0""u\0""I\0""G\0""1\0""5\0""I\0""H\0""N\0""v\0""d\0""W\0""w\0""7\0""I\0""H\0""d\0""o\0""Z\0""W\0""5\0""l\0" + "d\0""m\0""V\0""y\0""I\0""E\0""k\0""g\0""Z\0""m\0""l\0""u\0""Z\0""A\0""p\0""t\0""e\0""X\0""N\0""l\0""b\0""G\0""Y\0""g\0" + "a\0""W\0""5\0""2\0""b\0""2\0""x\0""1\0""b\0""n\0""R\0""h\0""c\0""m\0""l\0""s\0""e\0""S\0""B\0""w\0""Y\0""X\0""V\0""z\0" + "a\0""W\0""5\0""n\0""I\0""G\0""J\0""l\0""Z\0""m\0""9\0""y\0""Z\0""S\0""B\0""j\0""b\0""2\0""Z\0""m\0""a\0""W\0""4\0""g\0" + "d\0""2\0""F\0""y\0""Z\0""W\0""h\0""v\0""d\0""X\0""N\0""l\0""c\0""y\0""w\0""g\0""Y\0""W\0""5\0""k\0""I\0""G\0""J\0""y\0" + "a\0""W\0""5\0""n\0""a\0""W\0""5\0""n\0""I\0""H\0""V\0""w\0""C\0""n\0""R\0""o\0""Z\0""S\0""B\0""y\0""Z\0""W\0""F\0""y\0" + "I\0""G\0""9\0""m\0""I\0""G\0""V\0""2\0""Z\0""X\0""J\0""5\0""I\0""G\0""Z\0""1\0""b\0""m\0""V\0""y\0""Y\0""W\0""w\0""g\0" + "S\0""S\0""B\0""t\0""Z\0""W\0""V\0""0\0""O\0""y\0""B\0""h\0""b\0""m\0""Q\0""g\0""Z\0""X\0""N\0""w\0""Z\0""W\0""N\0""p\0" + "Y\0""W\0""x\0""s\0""e\0""S\0""B\0""3\0""a\0""G\0""V\0""u\0""Z\0""X\0""Z\0""l\0""c\0""i\0""B\0""t\0""e\0""S\0""B\0""o\0" + "e\0""X\0""B\0""v\0""c\0""y\0""B\0""n\0""Z\0""X\0""Q\0""K\0""c\0""3\0""V\0""j\0""a\0""C\0""B\0""h\0""b\0""i\0""B\0""1\0" + "c\0""H\0""B\0""l\0""c\0""i\0""B\0""o\0""Y\0""W\0""5\0""k\0""I\0""G\0""9\0""m\0""I\0""G\0""1\0""l\0""L\0""C\0""B\0""0\0" + "a\0""G\0""F\0""0\0""I\0""G\0""l\0""0\0""I\0""H\0""J\0""l\0""c\0""X\0""V\0""p\0""c\0""m\0""V\0""z\0""I\0""G\0""E\0""g\0" + "c\0""3\0""R\0""y\0""b\0""2\0""5\0""n\0""I\0""G\0""1\0""v\0""c\0""m\0""F\0""s\0""I\0""H\0""B\0""y\0""a\0""W\0""5\0""j\0" + "a\0""X\0""B\0""s\0""Z\0""S\0""B\0""0\0""b\0""w\0""p\0""w\0""c\0""m\0""V\0""2\0""Z\0""W\0""5\0""0\0""I\0""G\0""1\0""l\0" + "I\0""G\0""Z\0""y\0""b\0""2\0""0\0""g\0""Z\0""G\0""V\0""s\0""a\0""W\0""J\0""l\0""c\0""m\0""F\0""0\0""Z\0""W\0""x\0""5\0" + "I\0""H\0""N\0""0\0""Z\0""X\0""B\0""w\0""a\0""W\0""5\0""n\0""I\0""G\0""l\0""u\0""d\0""G\0""8\0""g\0""d\0""G\0""h\0""l\0" + "I\0""H\0""N\0""0\0""c\0""m\0""V\0""l\0""d\0""C\0""w\0""g\0""Y\0""W\0""5\0""k\0""I\0""G\0""1\0""l\0""d\0""G\0""h\0""v\0" + "Z\0""G\0""l\0""j\0""Y\0""W\0""x\0""s\0""e\0""Q\0""p\0""r\0""b\0""m\0""9\0""j\0""a\0""2\0""l\0""u\0""Z\0""y\0""B\0""w\0" + "Z\0""W\0""9\0""w\0""b\0""G\0""U\0""n\0""c\0""y\0""B\0""o\0""Y\0""X\0""R\0""z\0""I\0""G\0""9\0""m\0""Z\0""i\0""0\0""t\0" + "d\0""G\0""h\0""l\0""b\0""i\0""w\0""g\0""S\0""S\0""B\0""h\0""Y\0""2\0""N\0""v\0""d\0""W\0""5\0""0\0""I\0""G\0""l\0""0\0" + "I\0""G\0""h\0""p\0""Z\0""2\0""g\0""g\0""d\0""G\0""l\0""t\0""Z\0""S\0""B\0""0\0""b\0""y\0""B\0""n\0""Z\0""X\0""Q\0""g\0" + "d\0""G\0""8\0""g\0""c\0""2\0""V\0""h\0""C\0""m\0""F\0""z\0""I\0""H\0""N\0""v\0""b\0""2\0""4\0""g\0""Y\0""X\0""M\0""g\0" + "S\0""S\0""B\0""j\0""Y\0""W\0""4\0""u\0""I\0""F\0""R\0""o\0""a\0""X\0""M\0""g\0""a\0""X\0""M\0""g\0""b\0""X\0""k\0""g\0" + "c\0""3\0""V\0""i\0""c\0""3\0""R\0""p\0""d\0""H\0""V\0""0\0""Z\0""S\0""B\0""m\0""b\0""3\0""I\0""g\0""c\0""G\0""l\0""z\0" + "d\0""G\0""9\0""s\0""I\0""G\0""F\0""u\0""Z\0""C\0""B\0""i\0""Y\0""W\0""x\0""s\0""L\0""i\0""B\0""X\0""a\0""X\0""R\0""o\0" + "I\0""G\0""E\0""K\0""c\0""G\0""h\0""p\0""b\0""G\0""9\0""z\0""b\0""3\0""B\0""o\0""a\0""W\0""N\0""h\0""b\0""C\0""B\0""m\0" + "b\0""G\0""9\0""1\0""c\0""m\0""l\0""z\0""a\0""C\0""B\0""D\0""Y\0""X\0""R\0""v\0""I\0""H\0""R\0""o\0""c\0""m\0""9\0""3\0" + "c\0""y\0""B\0""o\0""a\0""W\0""1\0""z\0""Z\0""W\0""x\0""m\0""I\0""H\0""V\0""w\0""b\0""2\0""4\0""g\0""a\0""G\0""l\0""z\0" + "I\0""H\0""N\0""3\0""b\0""3\0""J\0""k\0""O\0""y\0""B\0""J\0""I\0""H\0""F\0""1\0""a\0""W\0""V\0""0\0""b\0""H\0""k\0""K\0" + "d\0""G\0""F\0""r\0""Z\0""S\0""B\0""0\0""b\0""y\0""B\0""0\0""a\0""G\0""U\0""g\0""c\0""2\0""h\0""p\0""c\0""C\0""4\0""g\0" + "V\0""G\0""h\0""l\0""c\0""m\0""U\0""g\0""a\0""X\0""M\0""g\0""b\0""m\0""9\0""0\0""a\0""G\0""l\0""u\0""Z\0""y\0""B\0""z\0" + "d\0""X\0""J\0""w\0""c\0""m\0""l\0""z\0""a\0""W\0""5\0""n\0""I\0""G\0""l\0""u\0""I\0""H\0""R\0""o\0""a\0""X\0""M\0""u\0" + "I\0""E\0""l\0""m\0""I\0""H\0""R\0""o\0""Z\0""X\0""k\0""g\0""Y\0""n\0""V\0""0\0""I\0""G\0""t\0""u\0""Z\0""X\0""c\0""K\0" + "a\0""X\0""Q\0""s\0""I\0""G\0""F\0""s\0""b\0""W\0""9\0""z\0""d\0""C\0""B\0""h\0""b\0""G\0""w\0""g\0""b\0""W\0""V\0""u\0" + "I\0""G\0""l\0""u\0""I\0""H\0""R\0""o\0""Z\0""W\0""l\0""y\0""I\0""G\0""R\0""l\0""Z\0""3\0""J\0""l\0""Z\0""S\0""w\0""g\0" + "c\0""2\0""9\0""t\0""Z\0""S\0""B\0""0\0""a\0""W\0""1\0""l\0""I\0""G\0""9\0""y\0""I\0""G\0""9\0""0\0""a\0""G\0""V\0""y\0" + "L\0""C\0""B\0""j\0""a\0""G\0""V\0""y\0""a\0""X\0""N\0""o\0""I\0""H\0""Z\0""l\0""c\0""n\0""k\0""K\0""b\0""m\0""V\0""h\0" + "c\0""m\0""x\0""5\0""I\0""H\0""R\0""o\0""Z\0""S\0""B\0""z\0""Y\0""W\0""1\0""l\0""I\0""G\0""Z\0""l\0""Z\0""W\0""x\0""p\0" + "b\0""m\0""d\0""z\0""I\0""H\0""R\0""v\0""d\0""2\0""F\0""y\0""Z\0""H\0""M\0""g\0""d\0""G\0""h\0""l\0""I\0""G\0""9\0""j\0" + "Z\0""W\0""F\0""u\0""I\0""H\0""d\0""p\0""d\0""G\0""g\0""g\0""b\0""W\0""U\0""u\0""C\0""g\0""=\0""=\0"; diff --git a/test/test_base64.c b/test/test_base64.c index 15996914..1c87b420 100644 --- a/test/test_base64.c +++ b/test/test_base64.c @@ -57,6 +57,32 @@ assert_dec (int flags, const char *src, const char *dst) return false; } +static bool +assert_dec16 (int flags, const char *src, const char *src16, const char *dst) +{ + size_t srclen = strlen(src); + size_t dstlen = strlen(dst); + + if (!base64_decode16((const uint16_t*)src16, srclen, out, &outlen, flags)) { + printf("FAIL: decoding of '%s': decoding error\n", src); + return true; + } + if (outlen != dstlen) { + printf("FAIL: encoding of '%s': " + "length expected %lu, got %lu\n", src, + (unsigned long)dstlen, + (unsigned long)outlen + ); + return true; + } + if (strncmp(dst, out, outlen) != 0) { + out[outlen] = '\0'; + printf("FAIL: decoding of '%s': expected output '%s', got '%s'\n", src, dst, out); + return true; + } + return false; +} + static int assert_roundtrip (int flags, const char *src) { @@ -292,22 +318,50 @@ test_one_codec (const char *codec, int flags) struct { const char *in; const char *out; + const char *out16; } vec[] = { // These are the test vectors from RFC4648: - { "", "" }, - { "f", "Zg==" }, - { "fo", "Zm8=" }, - { "foo", "Zm9v" }, - { "foob", "Zm9vYg==" }, - { "fooba", "Zm9vYmE=" }, - { "foobar", "Zm9vYmFy" }, + { "", "" , "\0" }, + { "f", "Zg==" , "Z\0g\0=\0=\0" }, + { "fo", "Zm8=" , "Z\0m\0""8\0=\0" }, + { "foo", "Zm9v" , "Z\0m\0""9\0v\0" }, + { "foob", "Zm9vYg==", "Z\0m\0""9\0v\0Y\0g\0=\0=\0" }, + { "fooba", "Zm9vYmE=", "Z\0m\0""9\0v\0Y\0m\0E\0=\0" }, + { "foobar", "Zm9vYmFy", "Z\0m\0""9\0v\0Y\0m\0F\0y\0" }, // The first paragraph from Moby Dick, // to test the SIMD codecs with larger blocksize: - { moby_dick_plain, moby_dick_base64 }, + { moby_dick_plain, moby_dick_base64, moby_dick_base64_16u }, }; +#if 0 + size_t c = 0; + const char * pSrc = moby_dick_base64; + putc('\t', stdout); + putc('\"', stdout); + while (*pSrc) { + int character = *pSrc++; + putc(character, stdout); + putc('\\', stdout); + putc('0', stdout); + putc('\"', stdout); + putc('\"', stdout); + + c++; + if (c == 24U) { + putc('\"', stdout); + putc('\n', stdout); + putc('\t', stdout); + putc('\"', stdout); + + c = 0U; + } + } + putc('\"', stdout); + putc('\n', stdout); +#endif + for (size_t i = 0; i < sizeof(vec) / sizeof(vec[0]); i++) { // Encode plain string, check against output: @@ -315,6 +369,7 @@ test_one_codec (const char *codec, int flags) // Decode the output string, check if we get the input: fail |= assert_dec(flags, vec[i].out, vec[i].in); + fail |= assert_dec16(flags, vec[i].out, vec[i].out16, vec[i].in); // Do a roundtrip on the inputs and the outputs: fail |= assert_roundtrip(flags, vec[i].in);