diff --git a/.gitignore b/.gitignore index 0187b3d..c792272 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,17 @@ build/ *~ +config.h +config.log +config.status +gen_ccsds.exe +gen_ccsds_tal.exe +gen_ccsds +gen_ccsds_tal +*.o +makefile +*.a +*.def +*.lib +*.exp +ccsds_tab.c +ccsds_tal.c \ No newline at end of file diff --git a/README b/README.md similarity index 60% rename from README rename to README.md index 95253e2..5948c7a 100644 --- a/README +++ b/README.md @@ -1,10 +1,32 @@ +libfec +====== + +This is a version of libfec that is able to compile in both linux and windows. It's also possible to link it against MSVC. + +To compile: + +```bash +./configure +make +``` + +For linking in MSVC compile it with mingw (normally) and then open Visual Studio Console: + +``` +lib /machine:i386 /def:libfec.def +``` + +A `libfec.lib` and `libfec.exp` should be generated. + COPYRIGHT +========= This package is copyright 2006 by Phil Karn, KA9Q. It may be used under the terms of the GNU Lesser General Public License (LGPL). See the file "lesser.txt" in this package for license details. INTRODUCTION +============ This package provides a set of functions that implement several popular forward error correction (FEC) algorithms and several low-level routines @@ -13,10 +35,9 @@ useful in modems implemented with digital signal processing (DSP). The following routines are provided: 1. Viterbi decoders for the following convolutional codes: - -r=1/2 k=7 ("Voyager" code, now a widely used industry standard) -r=1/2 k=9 (Used on the IS-95 CDMA forward link) -r=1/6 k=15 ("Cassini" code, used by several NASA/JPL deep space missions) + * **r=1/2 k=7** ("Voyager" code, now a widely used industry standard) + * **r=1/2 k=9** (Used on the IS-95 CDMA forward link) + * **r=1/6 k=15** ("Cassini" code, used by several NASA/JPL deep space missions) 2. Reed-Solomon encoders and decoders for any user-specified code. @@ -35,6 +56,7 @@ useful in DSP for finding the total energy in a signal. scaling a signal to prevent overflow. SIMD SUPPORT +============ This package automatically makes use of various SIMD (Single Instruction stream, Multiple Data stream) instruction sets, when @@ -70,51 +92,51 @@ Intel introduced SSE2 on the Pentium 4, and it has been picked up by later AMD CPUs. SSE support implies MMX support, while SSE2 support implies both SSE and MMX support. -The latest IA-32 SIMD instruction set, SSE3 (also known as "Prescott -New Instructions") was introduced in early 2004 with the latest -("Prescott") revision of the Pentium 4. Relatively little was +The latest IA-32 SIMD instruction set, SSE3 (also known as `Prescott +New Instructions`) was introduced in early 2004 with the latest +(`Prescott`) revision of the Pentium 4. Relatively little was introduced with SSE3, and this library currently makes no use of it. See the various manual pages for details on how to use the library routines. -Copyright 2006, Phil Karn, KA9Q -karn@ka9q.net -http://www.ka9q.net/ + Copyright 2006, Phil Karn, KA9Q + karn@ka9q.net + http://www.ka9q.net/ This software may be used under the terms of the GNU Lesser General Public License (LGPL); see the file lesser.txt for details. -Revision history: -Version 1.0 released 29 May 2001 +##### Revision history: + Version 1.0 released 29 May 2001 -Version 2.0 released 3 Dec 2001: -Restructured to add support for shared libraries. +##### Version 2.0 released 3 Dec 2001: + Restructured to add support for shared libraries. -Version 2.0.1 released 8 Dec 2001: -Includes autoconf/configure script +##### Version 2.0.1 released 8 Dec 2001: + Includes autoconf/configure script -Version 2.0.2 released 4 Feb 2002: -Add SIMD version override options -Test for lack of SSE2 mnemonic support in 'as' -Build only selected version +##### Version 2.0.2 released 4 Feb 2002: + Add SIMD version override options + Test for lack of SSE2 mnemonic support in 'as' + Build only selected version -Version 2.0.3 released 6 Feb 2002: -Fix to parityb function in parity.h +##### Version 2.0.3 released 6 Feb 2002: + Fix to parityb function in parity.h -feclib version 1.0 released November 2003 -Merged SIMD-Viterbi, RS and DSP libraries -Changed SIMD Viterbi decoder to detect SSE2/SSE/MMX at runtime rather than build time +##### feclib version 1.0 released November 2003 + Merged SIMD-Viterbi, RS and DSP libraries + Changed SIMD Viterbi decoder to detect SSE2/SSE/MMX at runtime rather than build time -feclib version 2.0 (unreleased) Mar 2004 -General speedups and cleanups -Switch from 4 to 8-bit input symbols on all Viterbi decoders -Support for Altivec on PowerPC -Support for k=15 r=1/6 Cassini/Mars Pathfinder/Mars Exploration Rover/STEREO code -Changed license to GNU Lesser General Public License (LGPL) +##### feclib version 2.0 (unreleased) Mar 2004 + General speedups and cleanups + Switch from 4 to 8-bit input symbols on all Viterbi decoders + Support for Altivec on PowerPC + Support for k=15 r=1/6 Cassini/Mars Pathfinder/Mars Exploration Rover/STEREO code + Changed license to GNU Lesser General Public License (LGPL) -feclib version 2.1 June 5 2006 -Added error checking, fixed alignment bug in SSE2 versions of Viterbi decoders causing segfaults +##### feclib version 2.1 June 5 2006 + Added error checking, fixed alignment bug in SSE2 versions of Viterbi decoders causing segfaults -feclib version 2.1.1 June 6 2006 -Fix test/benchmark time measurement on Linux +##### feclib version 2.1.1 June 6 2006 + Fix test/benchmark time measurement on Linux diff --git a/configure b/configure index bc1e224..189cb8c 100755 --- a/configure +++ b/configure @@ -3834,6 +3834,10 @@ darwin*) SH_LIB=libfec.dylib REBIND="" ;; +mingw32*) + SH_LIB=libfec.def + REBIND="" + ;; *) SH_LIB=libfec.so REBIND=ldconfig diff --git a/configure.in b/configure.in index 651d888..db075b3 100644 --- a/configure.in +++ b/configure.in @@ -82,6 +82,11 @@ darwin*) SH_LIB=libfec.dylib REBIND="" ;; +mingw32*) + echo "MINGW32 HEYYY!" + SH_LIB=libfec.def + REBIND="" + ;; *) SH_LIB=libfec.so REBIND=ldconfig diff --git a/cpu_features.s b/cpu_features.s index ef4ba4e..4a70b6a 100644 --- a/cpu_features.s +++ b/cpu_features.s @@ -1,7 +1,7 @@ .text -.global cpu_features - .type cpu_features,@function -cpu_features: +.global _cpu_features + #.type cpu_features,@function +_cpu_features: pushl %ebx pushl %ecx pushl %edx diff --git a/dotprod_mmx_assist.s b/dotprod_mmx_assist.s index 25deffd..def80a9 100644 --- a/dotprod_mmx_assist.s +++ b/dotprod_mmx_assist.s @@ -13,9 +13,9 @@ # May be used under the terms of the GNU Lesser General Public License (LGPL) .text - .global dotprod_mmx_assist - .type dotprod_mmx_assist,@function -dotprod_mmx_assist: + .global _dotprod_mmx_assist + #.type dotprod_mmx_assist,@function +_dotprod_mmx_assist: pushl %ebp movl %esp,%ebp pushl %esi diff --git a/dotprod_sse2_assist.s b/dotprod_sse2_assist.s index 47348fa..3046d29 100644 --- a/dotprod_sse2_assist.s +++ b/dotprod_sse2_assist.s @@ -13,9 +13,9 @@ # May be used under the terms of the GNU Lesser General Public License (LGPL) .text - .global dotprod_sse2_assist - .type dotprod_sse2_assist,@function -dotprod_sse2_assist: + .global _dotprod_sse2_assist + #.type dotprod_sse2_assist,@function +_dotprod_sse2_assist: pushl %ebp movl %esp,%ebp pushl %esi diff --git a/fec.h b/fec.h index 08e8454..ddde51f 100644 --- a/fec.h +++ b/fec.h @@ -6,6 +6,14 @@ #ifndef _FEC_H_ #define _FEC_H_ +#ifdef __MINGW32__ +#define posix_memalign __mingw_aligned_malloc +#endif + +#ifdef _M_IX86 +#define __i386__ +#endif + /* r=1/2 k=7 convolutional encoder polynomials * The NASA-DSN convention is to use V27POLYA inverted, then V27POLYB * The CCSDS/NASA-GSFC convention is to use V27POLYB, then V27POLYA inverted @@ -264,7 +272,18 @@ void find_cpu_mode(void); /* Call this once at startup to set Cpu_mode */ /* Determine parity of argument: 1 = odd, 0 = even */ #ifdef __i386__ static inline int parityb(unsigned char x){ + #ifdef __GNUC__ __asm__ __volatile__ ("test %1,%1;setpo %0" : "=g"(x) : "r" (x)); + #elif _MSC_VER + int par = x; + __asm { + mov eax, par + test eax, eax + setpo eax + mov par, eax + } + x = (unsigned char) (par & 0xFF); + #endif return x; } #else diff --git a/makefile.in b/makefile.in index a8e424b..b2e6206 100644 --- a/makefile.in +++ b/makefile.in @@ -106,6 +106,10 @@ libfec.dylib: $(LIBS) libfec.so: $(LIBS) $(CC) $(LDFLAGS) -shared -Xlinker -soname=$@ -o $@ -Wl,-whole-archive $^ -Wl,-no-whole-archive -lc +# for Windows +libfec.def: $(LIBS) + $(CC) $(LDFLAGS) -shared -Xlinker -soname=$@ -o $@ -Wl,-whole-archive $^ -Wl,-no-whole-archive,--output-def,libfec.def + dotprod.o: dotprod.c fec.h dotprod_port.o: dotprod_port.c fec.h diff --git a/mmxbfly27.s b/mmxbfly27.s index 4abbf48..9007315 100644 --- a/mmxbfly27.s +++ b/mmxbfly27.s @@ -13,11 +13,11 @@ .set OLDMETRICS,132 .set NEWMETRICS,136 .text - .global update_viterbi27_blk_mmx,Mettab27_1,Mettab27_2 - .type update_viterbi27_blk_mmx,@function + .global _update_viterbi27_blk_mmx, _Mettab27_1, _Mettab27_2 + #.type update_viterbi27_blk_mmx,@function .align 16 -update_viterbi27_blk_mmx: +_update_viterbi27_blk_mmx: pushl %ebp movl %esp,%ebp pushl %esi @@ -55,10 +55,10 @@ update_viterbi27_blk_mmx: # each invocation of this macro will do 8 butterflies in parallel .MACRO butterfly GROUP # Compute branch metrics - movq (Mettab27_1+8*\GROUP)(%eax),%mm3 + movq (_Mettab27_1+8*\GROUP)(%eax),%mm3 movq fifteens,%mm0 - paddb (Mettab27_2+8*\GROUP)(%ebx),%mm3 + paddb (_Mettab27_2+8*\GROUP)(%ebx),%mm3 paddb ones,%mm3 # emulate pavgb - this may not be necessary psrlq $1,%mm3 pand %mm0,%mm3 diff --git a/mmxbfly29.s b/mmxbfly29.s index e37cab8..5fa6574 100644 --- a/mmxbfly29.s +++ b/mmxbfly29.s @@ -11,14 +11,14 @@ .set OLDMETRICS,516 .set NEWMETRICS,520 .text - .global update_viterbi29_blk_mmx,Mettab29_1,Mettab29_2 - .type update_viterbi29_blk_mmx,@function + .global _update_viterbi29_blk_mmx,_Mettab29_1,_Mettab29_2 + #.type update_viterbi29_blk_mmx,@function .align 16 # MMX (64-bit SIMD) version # requires Pentium-MMX, Pentium-II or better -update_viterbi29_blk_mmx: +_update_viterbi29_blk_mmx: pushl %ebp movl %esp,%ebp pushl %esi @@ -57,9 +57,9 @@ update_viterbi29_blk_mmx: # each invocation of this macro will do 8 butterflies in parallel .MACRO butterfly GROUP # Compute branch metrics - movq (Mettab29_1+8*\GROUP)(%eax),%mm3 + movq (_Mettab29_1+8*\GROUP)(%eax),%mm3 movq fifteens,%mm0 - paddb (Mettab29_2+8*\GROUP)(%ebx),%mm3 + paddb (_Mettab29_2+8*\GROUP)(%ebx),%mm3 paddb ones,%mm3 # emulate pavgb - this may not be necessary psrlq $1,%mm3 pand %mm0,%mm3 diff --git a/peakval_mmx_assist.s b/peakval_mmx_assist.s index 553cb79..0bb5e93 100644 --- a/peakval_mmx_assist.s +++ b/peakval_mmx_assist.s @@ -6,10 +6,10 @@ # Find peak value in signed 16-bit input samples # int peakval_mmx_assist(signed short *in,int cnt); - .global peakval_mmx_assist - .type peakval_mmx_assist,@function + .global _peakval_mmx_assist + #.type peakval_mmx_assist,@function .align 16 -peakval_mmx_assist: +_peakval_mmx_assist: pushl %ebp movl %esp,%ebp pushl %esi diff --git a/peakval_sse2_assist.s b/peakval_sse2_assist.s index c7a58e7..d2e5691 100644 --- a/peakval_sse2_assist.s +++ b/peakval_sse2_assist.s @@ -6,10 +6,10 @@ # Find peak absolute value in signed 16-bit input samples # int peakval_sse2_assist(signed short *in,int cnt); - .global peakval_sse2_assist - .type peakval_sse2_assist,@function + .global _peakval_sse2_assist + #.type peakval_sse2_assist,@function .align 16 -peakval_sse2_assist: +_peakval_sse2_assist: pushl %ebp movl %esp,%ebp pushl %esi diff --git a/peakval_sse_assist.s b/peakval_sse_assist.s index 827c800..1bba247 100644 --- a/peakval_sse_assist.s +++ b/peakval_sse_assist.s @@ -6,10 +6,10 @@ # Find peak absolute value in signed 16-bit input samples # int peakval_sse_assist(signed short *in,int cnt); - .global peakval_sse_assist - .type peakval_sse_assist,@function + .global _peakval_sse_assist + #.type peakval_sse_assist,@function .align 16 -peakval_sse_assist: +_peakval_sse_assist: pushl %ebp movl %esp,%ebp pushl %esi diff --git a/sse2bfly27.s b/sse2bfly27.s index 27422a2..1655f11 100644 --- a/sse2bfly27.s +++ b/sse2bfly27.s @@ -13,11 +13,11 @@ .set OLDMETRICS,132 .set NEWMETRICS,136 .text - .global update_viterbi27_blk_sse2,Branchtab27_sse2 - .type update_viterbi27_blk_sse2,@function + .global _update_viterbi27_blk_sse2,_Branchtab27_sse2 + #.type update_viterbi27_blk_sse2,@function .align 16 -update_viterbi27_blk_sse2: +_update_viterbi27_blk_sse2: pushl %ebp movl %esp,%ebp pushl %esi @@ -61,8 +61,8 @@ update_viterbi27_blk_sse2: # each invocation of this macro does 16 butterflies in parallel .MACRO butterfly GROUP # compute branch metrics - movdqa Branchtab27_sse2+(16*\GROUP),%xmm4 - movdqa Branchtab27_sse2+32+(16*\GROUP),%xmm3 + movdqa _Branchtab27_sse2+(16*\GROUP),%xmm4 + movdqa _Branchtab27_sse2+32+(16*\GROUP),%xmm3 pxor %xmm6,%xmm4 pxor %xmm5,%xmm3 diff --git a/sse2bfly29.s b/sse2bfly29.s index 0fa1742..e029dc7 100644 --- a/sse2bfly29.s +++ b/sse2bfly29.s @@ -14,11 +14,11 @@ .set NEWMETRICS,520 .text - .global update_viterbi29_blk_sse2,Branchtab29_sse2 - .type update_viterbi29_blk_sse2,@function + .global _update_viterbi29_blk_sse2,_Branchtab29_sse2 + #.type update_viterbi29_blk_sse2,@function .align 16 -update_viterbi29_blk_sse2: +_update_viterbi29_blk_sse2: pushl %ebp movl %esp,%ebp pushl %esi @@ -63,8 +63,8 @@ update_viterbi29_blk_sse2: # each invocation of this macro does 16 butterflies in parallel .MACRO butterfly GROUP # compute branch metrics - movdqa Branchtab29_sse2+(16*\GROUP),%xmm4 - movdqa Branchtab29_sse2+128+(16*\GROUP),%xmm3 + movdqa _Branchtab29_sse2+(16*\GROUP),%xmm4 + movdqa _Branchtab29_sse2+128+(16*\GROUP),%xmm3 pxor %xmm6,%xmm4 pxor %xmm5,%xmm3 pavgb %xmm3,%xmm4 diff --git a/ssebfly27.s b/ssebfly27.s index 7f445da..894d932 100644 --- a/ssebfly27.s +++ b/ssebfly27.s @@ -14,11 +14,11 @@ .set OLDMETRICS,132 .set NEWMETRICS,136 .text -.global update_viterbi27_blk_sse,Branchtab27_sse - .type update_viterbi27_blk_sse,@function +.global _update_viterbi27_blk_sse,_Branchtab27_sse + #.type update_viterbi27_blk_sse,@function .align 16 -update_viterbi27_blk_sse: +_update_viterbi27_blk_sse: pushl %ebp movl %esp,%ebp pushl %esi @@ -60,8 +60,8 @@ update_viterbi27_blk_sse: # each invocation of this macro does 8 butterflies in parallel .MACRO butterfly GROUP # compute branch metrics - movq Branchtab27_sse+(8*\GROUP),%mm4 - movq Branchtab27_sse+32+(8*\GROUP),%mm3 + movq _Branchtab27_sse+(8*\GROUP),%mm4 + movq _Branchtab27_sse+32+(8*\GROUP),%mm3 pxor %mm6,%mm4 pxor %mm5,%mm3 pavgb %mm3,%mm4 # mm4 contains branch metrics diff --git a/ssebfly29.s b/ssebfly29.s index d7d2149..e543f20 100644 --- a/ssebfly29.s +++ b/ssebfly29.s @@ -12,11 +12,11 @@ .set OLDMETRICS,516 .set NEWMETRICS,520 .text - .global update_viterbi29_blk_sse,Branchtab29_sse - .type update_viterbi29_blk_sse,@function + .global _update_viterbi29_blk_sse,_Branchtab29_sse + #.type update_viterbi29_blk_sse,@function .align 16 -update_viterbi29_blk_sse: +_update_viterbi29_blk_sse: pushl %ebp movl %esp,%ebp pushl %esi @@ -58,8 +58,8 @@ update_viterbi29_blk_sse: # each invocation of this macro does 8 butterflies in parallel .MACRO butterfly GROUP # compute branch metrics - movq Branchtab29_sse+(8*\GROUP),%mm4 - movq Branchtab29_sse+128+(8*\GROUP),%mm3 + movq _Branchtab29_sse+(8*\GROUP),%mm4 + movq _Branchtab29_sse+128+(8*\GROUP),%mm3 pxor %mm6,%mm4 pxor %mm5,%mm3 pavgb %mm3,%mm4 # mm4 contains branch metrics diff --git a/sumsq_mmx_assist.s b/sumsq_mmx_assist.s index b3bac66..5e56da3 100644 --- a/sumsq_mmx_assist.s +++ b/sumsq_mmx_assist.s @@ -6,10 +6,10 @@ # Evaluate sum of squares of signed 16-bit input samples # long long sumsq_mmx_assist(signed short *in,int cnt); - .global sumsq_mmx_assist - .type sumsq_mmx_assist,@function + .global _sumsq_mmx_assist + #.type sumsq_mmx_assist,@function .align 16 -sumsq_mmx_assist: +_sumsq_mmx_assist: pushl %ebp movl %esp,%ebp pushl %esi @@ -50,9 +50,9 @@ sumsq_mmx_assist: # long sumsq_wd_mmx_assist(signed short *in,int cnt); # Quick version, only safe for small numbers of small input values... .global sumsq_wd_mmx_assist - .type sumsq_wd_mmx_assist,@function + #.type sumsq_wd_mmx_assist,@function .align 16 -sumsq_wd_mmx_assist: +_sumsq_wd_mmx_assist: pushl %ebp movl %esp,%ebp pushl %esi diff --git a/sumsq_sse2_assist.s b/sumsq_sse2_assist.s index d1c4ee7..b7a4558 100644 --- a/sumsq_sse2_assist.s +++ b/sumsq_sse2_assist.s @@ -5,10 +5,10 @@ .text # Evaluate sum of squares of signed 16-bit input samples # long long sumsq_sse2_assist(signed short *in,int cnt); - .global sumsq_sse2_assist - .type sumsq_sse2_assist,@function + .global _sumsq_sse2_assist + #.type sumsq_sse2_assist,@function .align 16 -sumsq_sse2_assist: +_sumsq_sse2_assist: pushl %ebp movl %esp,%ebp pushl %esi