Skip to content

Commit

Permalink
Add decode16 POC
Browse files Browse the repository at this point in the history
POC for aklomp#15
  • Loading branch information
mayeut committed Oct 22, 2016
1 parent fb336ed commit 4ac6ea0
Show file tree
Hide file tree
Showing 25 changed files with 630 additions and 29 deletions.
18 changes: 18 additions & 0 deletions include/libbase64.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#ifndef _LIBBASE64_H
#define _LIBBASE64_H

#include <stdint.h>

#ifdef __cplusplus
extern "C" {
#endif
Expand Down Expand Up @@ -80,6 +82,14 @@ int base64_decode
, int flags
) ;

int base64_decode16
( const uint16_t *src
, size_t srclen
, char *out
, size_t *outlen
, int flags
) ;

/* Call this before calling base64_stream_decode() to init the state. See above
* for `flags`; set to 0 for default operation: */
void base64_stream_decode_init
Expand All @@ -103,6 +113,14 @@ int base64_stream_decode
, size_t *outlen
) ;

int base64_stream_decode16
( struct base64_state *state
, const uint16_t *src
, size_t srclen
, char *out
, size_t *outlen
) ;

#ifdef __cplusplus
}
#endif
Expand Down
35 changes: 35 additions & 0 deletions lib/arch/avx/codec.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
#include "../ssse3/enc_translate.c"
#include "../sse41/enc_reshuffle.c"

#include "../generic/convert.c"

#endif // __AVX__

BASE64_ENC_FUNCTION(avx)
Expand All @@ -28,6 +30,9 @@ BASE64_ENC_FUNCTION(avx)
#endif
}

#define STRING_TYPE uint8_t
#define CHAR_CONVERT(x) (x)
#define LOAD_STRING(c) LOAD_STRING8(c)
BASE64_DEC_FUNCTION(avx)
{
#ifdef __AVX__
Expand All @@ -38,3 +43,33 @@ BASE64_DEC_FUNCTION(avx)
BASE64_DEC_STUB
#endif
}
#undef LOAD_STRING
#undef CHAR_CONVERT
#undef STRING_TYPE

#define STRING_TYPE uint16_t
#define CHAR_CONVERT(x) convert(x)
#define LOAD_STRING(c) LOAD_STRING16(c)
BASE64_DEC16_FUNCTION(avx)
{
#ifdef __AVX__
#include "../generic/dec_head.c"
#include "../sse42/dec_loop.c"
#include "../generic/dec_tail.c"
#else
BASE64_DEC_STUB
#endif
}
#undef LOAD_STRING
#undef CHAR_CONVERT
#undef STRING_TYPE

BASE64_CVT_FUNCTION(avx)
{
#ifdef __AVX__
#include "../sse2/convert_loop.c"
#include "../generic/convert_loop.c"
#else
BASE64_CVT_STUB
#endif
}
35 changes: 35 additions & 0 deletions lib/arch/avx2/codec.c
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,8 @@ dec_reshuffle (__m256i in)
0, 1, 2, 4, 5, 6, -1, -1));
}

#include "../generic/convert.c"

#endif // __AVX2__

BASE64_ENC_FUNCTION(avx2)
Expand All @@ -143,6 +145,9 @@ BASE64_ENC_FUNCTION(avx2)
#endif
}

#define STRING_TYPE uint8_t
#define CHAR_CONVERT(x) (x)
#define LOAD_STRING(c) _mm256_loadu_si256((__m256i *)c)
BASE64_DEC_FUNCTION(avx2)
{
#ifdef __AVX2__
Expand All @@ -153,3 +158,33 @@ BASE64_DEC_FUNCTION(avx2)
BASE64_DEC_STUB
#endif
}
#undef LOAD_STRING
#undef CHAR_CONVERT
#undef STRING_TYPE

#define STRING_TYPE uint16_t
#define CHAR_CONVERT(x) convert(x)
#define LOAD_STRING(c) _mm256_permute4x64_epi64(_mm256_packus_epi16( _mm256_loadu_si256((__m256i *)(c+0)),_mm256_loadu_si256((__m256i *)(c+16))), 0xD8)
BASE64_DEC16_FUNCTION(avx2)
{
#ifdef __AVX2__
#include "../generic/dec_head.c"
#include "dec_loop.c"
#include "../generic/dec_tail.c"
#else
BASE64_DEC_STUB
#endif
}
#undef LOAD_STRING
#undef CHAR_CONVERT
#undef STRING_TYPE

BASE64_CVT_FUNCTION(avx2)
{
#ifdef __AVX2__
#include "../sse2/convert_loop.c"
#include "../generic/convert_loop.c"
#else
BASE64_CVT_STUB
#endif
}
2 changes: 1 addition & 1 deletion lib/arch/avx2/dec_loop.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
while (srclen >= 45)
{
// Load string:
__m256i str = _mm256_loadu_si256((__m256i *)c);
__m256i str = LOAD_STRING(c);

// The input consists of six character sets in the Base64 alphabet,
// which we need to map back to the 6-bit values they represent.
Expand Down
21 changes: 21 additions & 0 deletions lib/arch/generic/codec.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
#include "../../../include/libbase64.h"
#include "../../codecs.h"

#include "../generic/convert.c"

BASE64_ENC_FUNCTION(plain)
{
#include "enc_head.c"
Expand All @@ -16,6 +18,8 @@ BASE64_ENC_FUNCTION(plain)
#include "enc_tail.c"
}

#define STRING_TYPE uint8_t
#define CHAR_CONVERT(x) (x)
BASE64_DEC_FUNCTION(plain)
{
#include "dec_head.c"
Expand All @@ -26,3 +30,20 @@ BASE64_DEC_FUNCTION(plain)
#endif
#include "dec_tail.c"
}
#undef CHAR_CONVERT
#undef STRING_TYPE

#define STRING_TYPE uint16_t
#define CHAR_CONVERT(x) convert(x)
BASE64_DEC16_FUNCTION(plain)
{
#include "dec_head.c"
#include "dec_tail.c"
}
#undef CHAR_CONVERT
#undef STRING_TYPE

BASE64_CVT_FUNCTION(plain)
{
#include "convert_loop.c"
}
13 changes: 13 additions & 0 deletions lib/arch/generic/convert.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
static inline uint8_t
convert (const uint16_t in)
{
unsigned value = in;
#if 0
unsigned mask = (unsigned)((0 - (int)(value >> 8)) >> (sizeof(int) * 8U - 1U));

value |= mask;
return (uint8_t)value;
#else
return (value > 255U) ? 255U : value;
#endif
}
4 changes: 4 additions & 0 deletions lib/arch/generic/convert_loop.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
while (len > 0U) {
*dst++ = (char)convert(*src++);
len--;
}
4 changes: 2 additions & 2 deletions lib/arch/generic/dec_head.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
int ret = 0;
const uint8_t *c = (const uint8_t *)src;
const STRING_TYPE *c = (const STRING_TYPE *)src;
uint8_t *o = (uint8_t *)out;
uint8_t q;

Expand All @@ -17,7 +17,7 @@ if (st.eof) {
// If there was a trailing '=' to check, check it:
if (srclen && (st.eof == BASE64_AEOF)) {
st.eof = BASE64_EOF;
ret = (base64_table_dec[*c++] == 254) ? 1 : 0;
ret = (base64_table_dec[CHAR_CONVERT(*c++)] == 254) ? 1 : 0;
}
return ret;
}
Expand Down
10 changes: 5 additions & 5 deletions lib/arch/generic/dec_tail.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
ret = 1;
break;
}
if ((q = base64_table_dec[*c++]) >= 254) {
if ((q = base64_table_dec[CHAR_CONVERT(*c++)]) >= 254) {
st.eof = BASE64_EOF;
// Treat character '=' as invalid for byte 0:
break;
Expand All @@ -14,7 +14,7 @@
ret = 1;
break;
}
if ((q = base64_table_dec[*c++]) >= 254) {
if ((q = base64_table_dec[CHAR_CONVERT(*c++)]) >= 254) {
st.eof = BASE64_EOF;
// Treat character '=' as invalid for byte 1:
break;
Expand All @@ -28,14 +28,14 @@
ret = 1;
break;
}
if ((q = base64_table_dec[*c++]) >= 254) {
if ((q = base64_table_dec[CHAR_CONVERT(*c++)]) >= 254) {
// When q == 254, the input char is '='.
// Check if next byte is also '=':
if (q == 254) {
if (srclen-- != 0) {
// EOF:
st.eof = BASE64_EOF;
ret = (base64_table_dec[*c++] == 254) ? 1 : 0;
ret = (base64_table_dec[CHAR_CONVERT(*c++)] == 254) ? 1 : 0;
break;
}
else {
Expand All @@ -58,7 +58,7 @@
ret = 1;
break;
}
if ((q = base64_table_dec[*c++]) >= 254) {
if ((q = base64_table_dec[CHAR_CONVERT(*c++)]) >= 254) {
st.eof = BASE64_EOF;
// When q == 254, the input char is '='. Return 1 and EOF.
// When q == 255, the input char is invalid. Return 0 and EOF.
Expand Down
12 changes: 12 additions & 0 deletions lib/arch/neon32/codec.c
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@ enc_translate (uint8x16x4_t in)
return out;
}

#include "../generic/convert.c"

#endif

// Stride size is so large on these NEON 32-bit functions
Expand Down Expand Up @@ -140,3 +142,13 @@ BASE64_DEC_FUNCTION(neon32)
BASE64_DEC_STUB
#endif
}


BASE64_CVT_FUNCTION(neon32)
{
#if (defined(__arm__) && defined(__ARM_NEON__))
#include "../generic/convert_loop.c"
#else
BASE64_CVT_STUB
#endif
}
12 changes: 12 additions & 0 deletions lib/arch/neon64/codec.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ static const char *base64_table_enc_transposed =
"Oeu+"
"Pfv/"
};

#include "../generic/convert.c"

#endif

// Stride size is so large on these NEON 64-bit functions
Expand Down Expand Up @@ -73,3 +76,12 @@ BASE64_DEC_FUNCTION(neon64)
BASE64_DEC_STUB
#endif
}

BASE64_CVT_FUNCTION(neon64)
{
#if (defined(__aarch64__) && defined(__ARM_NEON__))
#include "../generic/convert_loop.c"
#else
BASE64_CVT_STUB
#endif
}
2 changes: 2 additions & 0 deletions lib/arch/sse2/compare_macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@
#define CMPEQ(s,n) _mm_cmpeq_epi8((s), _mm_set1_epi8(n))
#define REPLACE(s,n) _mm_and_si128((s), _mm_set1_epi8(n))
#define RANGE(s,a,b) _mm_andnot_si128(CMPGT((s), (b)), CMPGT((s), (a) - 1))
#define LOAD_STRING8(c) _mm_loadu_si128((__m128i *)c)
#define LOAD_STRING16(c) _mm_packus_epi16(_mm_loadu_si128((__m128i *)(c + 0)), _mm_loadu_si128((__m128i *)(c + 8)))
53 changes: 53 additions & 0 deletions lib/arch/sse2/convert_loop.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
while (len >= 64U) {
__m128i src0 = _mm_loadu_si128((const __m128i*)(src + 0));
__m128i src1 = _mm_loadu_si128((const __m128i*)(src + 8));
__m128i src2 = _mm_loadu_si128((const __m128i*)(src + 16));
__m128i src3 = _mm_loadu_si128((const __m128i*)(src + 24));
__m128i src4 = _mm_loadu_si128((const __m128i*)(src + 32));
__m128i src5 = _mm_loadu_si128((const __m128i*)(src + 40));
__m128i src6 = _mm_loadu_si128((const __m128i*)(src + 48));
__m128i src7 = _mm_loadu_si128((const __m128i*)(src + 56));


__m128i dst0 = _mm_packus_epi16(src0, src1);
__m128i dst1 = _mm_packus_epi16(src2, src3);
__m128i dst2 = _mm_packus_epi16(src4, src5);
__m128i dst3 = _mm_packus_epi16(src6, src7);

_mm_storeu_si128((__m128i*)(dst + 0), dst0);
_mm_storeu_si128((__m128i*)(dst + 16), dst1);
_mm_storeu_si128((__m128i*)(dst + 32), dst2);
_mm_storeu_si128((__m128i*)(dst + 48), dst3);

len-= 64U;
src += 64U;
dst += 64U;
}
if (len & 32U) {
__m128i src0 = _mm_loadu_si128((const __m128i*)(src + 0));
__m128i src1 = _mm_loadu_si128((const __m128i*)(src + 8));
__m128i src2 = _mm_loadu_si128((const __m128i*)(src + 16));
__m128i src3 = _mm_loadu_si128((const __m128i*)(src + 24));

__m128i dst0 = _mm_packus_epi16(src0, src1);
__m128i dst1 = _mm_packus_epi16(src2, src3);

_mm_storeu_si128((__m128i*)(dst + 0), dst0);
_mm_storeu_si128((__m128i*)(dst + 16), dst1);

len-= 32U;
src += 32U;
dst += 32U;
}
if (len & 16U) {
__m128i src0 = _mm_loadu_si128((const __m128i*)(src + 0));
__m128i src1 = _mm_loadu_si128((const __m128i*)(src + 8));

__m128i dst0 = _mm_packus_epi16(src0, src1);

_mm_storeu_si128((__m128i*)(dst + 0), dst0);

len-= 16U;
src += 16U;
dst += 16U;
}
Loading

0 comments on commit 4ac6ea0

Please sign in to comment.