diff --git a/neural/CPUFeatures.pas b/neural/CPUFeatures.pas new file mode 100644 index 00000000..666d3902 --- /dev/null +++ b/neural/CPUFeatures.pas @@ -0,0 +1,354 @@ +// ################################################################### +// #### This file is part of the mathematics library project, and is +// #### offered under the licence agreement described on +// #### http://www.mrsoft.org/ +// #### +// #### Copyright:(c) 2011, Michael R. . All rights reserved. +// #### +// #### Unless required by applicable law or agreed to in writing, software +// #### distributed under the License is distributed on an "AS IS" BASIS, +// #### WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// #### See the License for the specific language governing permissions and +// #### limitations under the License. +// ################################################################### + + +unit CPUFeatures; + +// unit to determine some cpu features + +interface + +function IsSSE3Present : boolean; +function IsAVXPresent : boolean; +function IsAVX512Present : boolean; +function IsFMAPresent : boolean; +function IsHardwareRNDSupport : boolean; +function IsHardwareRDSeed : boolean; + +function GetCurrentProcessorNumber : LongWord; register; + +implementation + +// ########################################### +// #### Global constants for features: + + +// base idea from https://stackoverflow.com/questions/6121792/how-to-check-if-a-cpu-supports-the-sse3-instruction-set +// misc +var HW_MMX: boolean = False; + HW_x64: boolean = False; + HW_ABM: boolean = False; // Advanced Bit Manipulation + HW_RDRAND: boolean = False; + HW_RDSEED: boolean = False; + HW_BMI1: boolean = False; + HW_BMI2: boolean = False; + HW_ADX: boolean = False; + HW_PREFETCHWT1: boolean = False; + + // SIMD: 128-bit + HW_SSE: boolean = False; + HW_SSE2: boolean = False; + HW_SSE3: boolean = False; + HW_SSSE3: boolean = False; + HW_SSE41: boolean = False; + HW_SSE42: boolean = False; + HW_SSE4a: boolean = False; + HW_AES: boolean = False; + HW_SHA: boolean = False; + + // SIMD: 256-bit + HW_AVX: boolean = False; + HW_XOP: boolean = False; + HW_FMA3: boolean = False; + HW_FMA4: boolean = False; + HW_AVX2: boolean = False; + + // SIMD: 512-bit + HW_AVX512F: boolean = False; // AVX512 Foundation + HW_AVX512CD: boolean = False; // AVX512 Conflict Detection + HW_AVX512PF: boolean = False; // AVX512 Prefetch + HW_AVX512ER: boolean = False; // AVX512 Exponential + Reciprocal + HW_AVX512VL: boolean = False; // AVX512 Vector Length Extensions + HW_AVX512BW: boolean = False; // AVX512 Byte + Word + HW_AVX512DQ: boolean = False; // AVX512 Doubleword + Quadword + HW_AVX512IFMA: boolean = False; // AVX512 Integer 52-bit Fused Multiply-Add + HW_AVX512VBMI: boolean = False; // AVX512 Vector Byte Manipulation Instructions + + AVX_OS_SUPPORT : boolean = False; // 256bit AVX supported in context switch + AVX512_OS_SUPPORT : boolean = False; // 512bit AVX supported in context switch + +// ############################################################## +// #### feature detection code +// ############################################################## + +type + TRegisters = record + EAX, + EBX, + ECX, + EDX: Cardinal; + end; + +{$IFDEF FPC} {$ASMMODE intel} {$S-} {$ENDIF} + +{$IFDEF CPUX64} +{$DEFINE x64} +{$ENDIF} +{$IFDEF cpux86_64} +{$DEFINE x64} +{$ENDIF} +{$IFDEF x64} + +function IsCPUID_Available : boolean; +begin + Result := true; +end; + +procedure GetCPUID(Param: Cardinal; out Registers: TRegisters); +var iRBX, iRDI : int64; +{$IFDEF FPC} +begin +{$ENDIF} +asm + mov iRBX, rbx; + mov iRDI, rdi; + +// .pushnv rbx; {save affected registers} +// .pushnv rdi; + + MOV RDI, Registers + MOV EAX, Param; + XOR RBX, RBX {clear EBX register} + XOR RCX, RCX {clear ECX register} + XOR RDX, RDX {clear EDX register} + DB $0F, $A2 {CPUID opcode} + MOV TRegisters(RDI).&EAX, EAX {save EAX register} + MOV TRegisters(RDI).&EBX, EBX {save EBX register} + MOV TRegisters(RDI).&ECX, ECX {save ECX register} + MOV TRegisters(RDI).&EDX, EDX {save EDX register} + + // epilog + mov rbx, iRBX; + mov rdi, IRDI; +{$IFDEF FPC} +end; +{$ENDIF} +end; + +{$ELSE} + +function IsCPUID_Available: Boolean; register; +{$IFDEF FPC} begin {$ENDIF} +asm + PUSHFD {save EFLAGS to stack} + POP EAX {store EFLAGS in EAX} + MOV EDX, EAX {save in EDX for later testing} + XOR EAX, $200000; {flip ID bit in EFLAGS} + PUSH EAX {save new EFLAGS value on stack} + POPFD {replace current EFLAGS value} + PUSHFD {get new EFLAGS} + POP EAX {store new EFLAGS in EAX} + XOR EAX, EDX {check if ID bit changed} + JZ @exit {no, CPUID not available} + MOV EAX, True {yes, CPUID is available} +@exit: +end; +{$IFDEF FPC} end; {$ENDIF} + +procedure GetCPUID(Param: Cardinal; var Registers: TRegisters); +{$IFDEF FPC} begin {$ENDIF} +asm + PUSH EBX {save affected registers} + PUSH EDI + MOV EDI, Registers + XOR EBX, EBX {clear EBX register} + XOR ECX, ECX {clear ECX register} + XOR EDX, EDX {clear EDX register} + DB $0F, $A2 {CPUID opcode} + MOV TRegisters(EDI).&EAX, EAX {save EAX register} + MOV TRegisters(EDI).&EBX, EBX {save EBX register} + MOV TRegisters(EDI).&ECX, ECX {save ECX register} + MOV TRegisters(EDI).&EDX, EDX {save EDX register} + POP EDI {restore registers} + POP EBX +end; +{$IFDEF FPC} end; {$ENDIF} + +{$ENDIF} + + +// ########################################### +// #### Local check for AVX support according to +// from https://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled +// and // from https://software.intel.com/content/www/us/en/develop/articles/how-to-detect-knl-instruction-support.html +procedure InitAVXOSSupportFlags; {$IFDEF FPC}assembler;{$ENDIF} +asm + {$IFDEF x64} + push rbx; + {$ELSE} + push ebx; + {$ENDIF} + + xor eax, eax; + cpuid; + cmp eax, 1; + jb @@endProc; + + mov eax, 1; + cpuid; + + and ecx, $018000000; // check 27 bit (OS uses XSAVE/XRSTOR) + cmp ecx, $018000000; // and 28 (AVX supported by CPU) + jne @@endProc; + + xor ecx, ecx ; // XFEATURE_ENABLED_MASK/XCR0 register number = 0 + db $0F, $01, $D0; //xgetbv ; // XFEATURE_ENABLED_MASK register is in edx:eax + and eax, $E6; //110b + cmp eax, $E6; //1110 0011 = zmm_ymm_xmm = (7 << 5) | (1 << 2) | (1 << 1); + jne @@not_supported; + {$IFDEF x64} + mov [rip + AVX512_OS_SUPPORT], 1; + {$ELSE} + mov AVX512_OS_SUPPORT, 1; + {$ENDIF} + @@not_supported: + + and eax, $6; //110b + cmp eax, $6; //1110 0011 = check for AVX os support (256bit) in a context switch + jne @@endProc; + {$IFDEF x64} + mov [rip + AVX_OS_SUPPORT], 1; + {$ELSE} + mov AVX_OS_SUPPORT, 1; + {$ENDIF} + + @@endProc: + + {$IFDEF x64} + pop rbx; + {$ELSE} + pop ebx; + {$ENDIF} +end; + +function GetCurrentProcessorNumber : LongWord; register; // stdcall; external 'Kernel32.dll'; +{$IFDEF FPC} +begin +{$ENDIF} +asm + mov eax, 1; + DB $0F, $A2; //cpuid; + shr ebx, 24; + mov eax, ebx; +{$IFDEF FPC} +end; +{$ENDIF} +end; + +procedure InitFlags; +var nIds : LongWord; + reg : TRegisters; +begin + if IsCPUID_Available then + begin + GetCPUID(0, reg); + nids := reg.EAX; + + + if nids >= 1 then + begin + GetCPUID(1, reg); + + HW_MMX := (reg.EDX and (1 shl 23)) <> 0; + HW_SSE := (reg.EDX and (1 shl 25)) <> 0; + HW_SSE2 := (reg.EDX and (1 shl 26)) <> 0; + HW_SSE3 := (reg.EDX and (1 shl 0)) <> 0; + + HW_SSSE3 := (reg.ECX and (1 shl 9)) <> 0; + HW_SSE41 := (reg.ECX and (1 shl 19)) <> 0; + HW_SSE42 := (reg.ECX and (1 shl 20)) <> 0; + HW_AES := (reg.ECX and (1 shl 25)) <> 0; + + HW_AVX := (reg.ECX and (1 shl 28)) <> 0; + HW_FMA3 := (reg.ECX and (1 shl 12)) <> 0; + + HW_RDRAND := (reg.ECX and (1 shl 30)) <> 0; + end; + + if nids >= 7 then + begin + GetCPUID($7, reg); + HW_AVX2 := (reg.EBX and (1 shl 5)) <> 0; + + HW_BMI1 := (reg.EBX and (1 shl 3)) <> 0; + HW_BMI2 := (reg.EBX and (1 shl 8)) <> 0; + HW_ADX := (reg.EBX and (1 shl 19)) <> 0; + HW_SHA := (reg.EBX and (1 shl 29)) <> 0; + HW_PREFETCHWT1 := (reg.EBX and (1 shl 0)) <> 0; + HW_RDSEED := (reg.EBX and (1 shl 18)) <> 0; + + HW_AVX512F := (reg.EBX and (1 shl 16)) <> 0; + HW_AVX512CD := (reg.EBX and (1 shl 28)) <> 0; + HW_AVX512PF := (reg.EBX and (1 shl 26)) <> 0; + HW_AVX512ER := (reg.EBX and (1 shl 27)) <> 0; + HW_AVX512VL := (reg.EBX and (1 shl 31)) <> 0; + HW_AVX512BW := (reg.EBX and (1 shl 30)) <> 0; + HW_AVX512DQ := (reg.EBX and (1 shl 17)) <> 0; + HW_AVX512IFMA := (reg.EBX and (1 shl 21)) <> 0; + HW_AVX512VBMI := (reg.ECX and (1 shl 1)) <> 0; + end; + + GetCPUID($80000000, reg); + + if reg.EAX >= $80000001 then + begin + GetCPUID($80000001, reg); + + HW_x64 := (reg.EDX and (1 shl 29)) <> 0; + HW_ABM := (reg.ECX and (1 shl 5)) <> 0; + HW_SSE4a := (reg.ECX and (1 shl 6)) <> 0; + HW_FMA4 := (reg.ECX and (1 shl 16)) <> 0; + HW_XOP := (reg.ECX and (1 shl 11)) <> 0; + end; + + // now check the os support + if (HW_AVX) or (HW_AVX2) then + InitAVXOSSupportFlags; + end; +end; + +function IsSSE3Present : boolean; +begin + Result := HW_SSE3; +end; + +function IsAVXPresent : boolean; +begin + Result := HW_AVX2 and AVX_OS_SUPPORT; +end; + +function IsAVX512Present : boolean; +begin + Result := HW_AVX512F and AVX512_OS_SUPPORT; +end; + +function IsFMAPresent : boolean; +begin + Result := AVX_OS_SUPPORT and HW_FMA3; +end; + +function IsHardwareRNDSupport : boolean; +begin + Result := HW_RDRAND; +end; + +function IsHardwareRDSeed : boolean; +begin + Result := HW_RDSEED; +end; + +initialization + InitFlags; + +end. diff --git a/neural/Neural.AVX.pas b/neural/Neural.AVX.pas new file mode 100644 index 00000000..c121e30f --- /dev/null +++ b/neural/Neural.AVX.pas @@ -0,0 +1,112 @@ +unit Neural.AVX; + +// ########################################### +// #### 32 bit intel avx functions +// ########################################### + +interface + +{$IFDEF CPUX64} +{$DEFINE x64} +{$ENDIF} +{$IFDEF cpux86_64} +{$DEFINE x64} +{$ENDIF} +{$IFNDEF x64} + +function AVXDotProd( x : PSingle; y : PSingle; N : integer ) : single; {$IFDEF FPC} assembler; {$ELSE} register; {$ENDIF} + +{$ENDIF} + +implementation + +{$IFNDEF x64} + +{$IFDEF FPC} {$ASMMODE intel} {$S-} {$ENDIF} + +function AVXDotProd( x : PSingle; y : PSingle; N : integer ) : single; +// eax = x, edx = y, ecx = N +asm + // iters + imul ecx, -4; + + // helper registers for the mt1, mt2 and dest pointers + sub eax, ecx; + sub edx, ecx; + + {$IFDEF FPC}vxorpd ymm0, ymm0, ymm0;{$ELSE}db $C5,$FD,$57,$C0;{$ENDIF} + + // unrolled loop + @Loop1: + add ecx, 128; + jg @loopEnd1; + + {$IFDEF FPC}vmovupd ymm1, [eax + ecx - 128];{$ELSE}db $C5,$FD,$10,$4C,$08,$80;{$ENDIF} + {$IFDEF FPC}vmovupd ymm2, [edx + ecx - 128];{$ELSE}db $C5,$FD,$10,$54,$0A,$80;{$ENDIF} + {$IFDEF FPC}vmulps ymm1, ymm1, ymm2;{$ELSE}db $C5,$F4,$59,$CA;{$ENDIF} + {$IFDEF FPC}vaddps ymm0, ymm0, ymm1;{$ELSE}db $C5,$FC,$58,$C1;{$ENDIF} + + {$IFDEF FPC}vmovupd ymm3, [eax + ecx - 96];{$ELSE}db $C5,$FD,$10,$5C,$08,$A0;{$ENDIF} + {$IFDEF FPC}vmovupd ymm4, [edx + ecx - 96];{$ELSE}db $C5,$FD,$10,$64,$0A,$A0;{$ENDIF} + {$IFDEF FPC}vmulps ymm3, ymm3, ymm4;{$ELSE}db $C5,$E4,$59,$DC;{$ENDIF} + {$IFDEF FPC}vaddps ymm0, ymm0, ymm3;{$ELSE}db $C5,$FC,$58,$C3;{$ENDIF} + + {$IFDEF FPC}vmovupd ymm1, [eax + ecx - 64];{$ELSE}db $C5,$FD,$10,$4C,$08,$C0;{$ENDIF} + {$IFDEF FPC}vmovupd ymm2, [edx + ecx - 64];{$ELSE}db $C5,$FD,$10,$54,$0A,$C0;{$ENDIF} + {$IFDEF FPC}vmulps ymm1, ymm1, ymm2;{$ELSE}db $C5,$F4,$59,$CA;{$ENDIF} + {$IFDEF FPC}vaddps ymm0, ymm0, ymm1;{$ELSE}db $C5,$FC,$58,$C1;{$ENDIF} + + {$IFDEF FPC}vmovupd ymm3, [eax + ecx - 32];{$ELSE}db $C5,$FD,$10,$5C,$08,$E0;{$ENDIF} + {$IFDEF FPC}vmovupd ymm4, [edx + ecx - 32];{$ELSE}db $C5,$FD,$10,$64,$0A,$E0;{$ENDIF} + {$IFDEF FPC}vmulps ymm3, ymm3, ymm4;{$ELSE}db $C5,$E4,$59,$DC;{$ENDIF} + {$IFDEF FPC}vaddps ymm0, ymm0, ymm3;{$ELSE}db $C5,$FC,$58,$C3;{$ENDIF} + + jmp @Loop1; + + @loopEnd1: + + {$IFDEF FPC}vextractf128 xmm2, ymm0, 1;{$ELSE}db $C4,$E3,$7D,$19,$C2,$01;{$ENDIF} + {$IFDEF FPC}vhaddps xmm0, xmm0, xmm2;{$ELSE}db $C5,$FB,$7C,$C2;{$ENDIF} + + sub ecx, 128; + jz @loop2End; + + // loop to get all fitting into an array of 4 + @Loop2: + add ecx, 16; + jg @Loop2End; + + {$IFDEF FPC}vmovupd xmm3, [eax + ecx - 16];{$ELSE}db $C5,$F9,$10,$5C,$08,$F0;{$ENDIF} + {$IFDEF FPC}vmovupd xmm4, [edx + ecx - 16];{$ELSE}db $C5,$F9,$10,$64,$0A,$F0;{$ENDIF} + {$IFDEF FPC}vmulps xmm3, xmm3, xmm4;{$ELSE}db $C5,$E0,$59,$DC;{$ENDIF} + {$IFDEF FPC}vaddps xmm0, xmm0, xmm3;{$ELSE}db $C5,$F8,$58,$C3;{$ENDIF} + jmp @Loop2; + + @Loop2End: + + // handle last 2 elements + sub ecx, 16; + jz @loop3End; + + @loop3: + add ecx, 4; + jg @loop3End; + + {$IFDEF FPC}vmovss xmm3, [eax + ecx - 4];{$ELSE}db $C5,$FA,$10,$5C,$08,$FC;{$ENDIF} + {$IFDEF FPC}vmovss xmm4, [edx + ecx - 4];{$ELSE}db $C5,$FA,$10,$64,$0A,$FC;{$ENDIF} + {$IFDEF FPC}vmulss xmm3, xmm3, xmm4;{$ELSE}db $C5,$E2,$59,$DC;{$ENDIF} + {$IFDEF FPC}vaddss xmm0, xmm0, xmm3;{$ELSE}db $C5,$FA,$58,$C3;{$ENDIF} + + jmp @loop3; + @loop3End: + + // build result + {$IFDEF FPC}vhaddps xmm0, xmm0, xmm0;{$ELSE}db $C5,$FB,$7C,$C0;{$ENDIF} + {$IFDEF FPC}vhaddps xmm0, xmm0, xmm0;{$ELSE}db $C5,$FB,$7C,$C0;{$ENDIF} + {$IFDEF FPC}vzeroupper;{$ELSE}db $C5,$F8,$77;{$ENDIF} + movss Result, xmm0; +end; + +{$ENDIF} + +end. diff --git a/neural/Neural.AVXx64.pas b/neural/Neural.AVXx64.pas new file mode 100644 index 00000000..f4c1c870 --- /dev/null +++ b/neural/Neural.AVXx64.pas @@ -0,0 +1,120 @@ +unit Neural.AVXx64; + +// ########################################### +// #### 64 bit intel avx functions +// ########################################### + +interface + +{$IFDEF CPUX64} +{$DEFINE x64} +{$ENDIF} +{$IFDEF cpux86_64} +{$DEFINE x64} +{$ENDIF} +{$IFDEF x64} + +function AVXDotProd( x : PSingle; y : PSingle; N : integer ) : single; {$IFDEF FPC}assembler;{$ENDIF} + +{$ENDIF} + +implementation + +{$IFDEF x64} + +{$IFDEF FPC} {$ASMMODE intel} {$S-} {$ENDIF} + +function AVXDotProd( x : PSingle; y : PSingle; N : integer ) : single; +asm + {$IFDEF UNIX} + // Linux uses a diffrent ABI -> copy over the registers so they meet with winABI + // The parameters are passed in the following order: + // RDI, RSI, RDX, RCX, r8, r9 -> mov to RCX, RDX, R8, r9 + mov r8, rdx; + mov rdx, rsi; + mov rcx, rdi; + {$ENDIF} + + // iters + imul r8, -4; + + // helper registers for the mt1, mt2 and dest pointers + sub rcx, r8; + sub rdx, r8; + + {$IFDEF FPC}vxorpd ymm0, ymm0, ymm0;{$ELSE}db $C5,$FD,$57,$C0;{$ENDIF} + + // unrolled loop + @Loop1: + add r8, 128; + jg @loopEnd1; + + {$IFDEF FPC}vmovupd ymm1, [rcx + r8 - 128];{$ELSE}db $C4,$A1,$7D,$10,$4C,$01,$80;{$ENDIF} + {$IFDEF FPC}vmovupd ymm2, [rdx + r8 - 128];{$ELSE}db $C4,$A1,$7D,$10,$54,$02,$80;{$ENDIF} + {$IFDEF FPC}vmulps ymm1, ymm1, ymm2;{$ELSE}db $C5,$F4,$59,$CA;{$ENDIF} + {$IFDEF FPC}vaddps ymm0, ymm0, ymm1;{$ELSE}db $C5,$FC,$58,$C1;{$ENDIF} + + {$IFDEF FPC}vmovupd ymm3, [rcx + r8 - 96];{$ELSE}db $C4,$A1,$7D,$10,$5C,$01,$A0;{$ENDIF} + {$IFDEF FPC}vmovupd ymm4, [rdx + r8 - 96];{$ELSE}db $C4,$A1,$7D,$10,$64,$02,$A0;{$ENDIF} + {$IFDEF FPC}vmulps ymm3, ymm3, ymm4;{$ELSE}db $C5,$E4,$59,$DC;{$ENDIF} + {$IFDEF FPC}vaddps ymm0, ymm0, ymm3;{$ELSE}db $C5,$FC,$58,$C3;{$ENDIF} + + {$IFDEF FPC}vmovupd ymm1, [rcx + r8 - 64];{$ELSE}db $C4,$A1,$7D,$10,$4C,$01,$C0;{$ENDIF} + {$IFDEF FPC}vmovupd ymm2, [rdx + r8 - 64];{$ELSE}db $C4,$A1,$7D,$10,$54,$02,$C0;{$ENDIF} + {$IFDEF FPC}vmulps ymm1, ymm1, ymm2;{$ELSE}db $C5,$F4,$59,$CA;{$ENDIF} + {$IFDEF FPC}vaddps ymm0, ymm0, ymm1;{$ELSE}db $C5,$FC,$58,$C1;{$ENDIF} + + {$IFDEF FPC}vmovupd ymm3, [rcx + r8 - 32];{$ELSE}db $C4,$A1,$7D,$10,$5C,$01,$E0;{$ENDIF} + {$IFDEF FPC}vmovupd ymm4, [rdx + r8 - 32];{$ELSE}db $C4,$A1,$7D,$10,$64,$02,$E0;{$ENDIF} + {$IFDEF FPC}vmulps ymm3, ymm3, ymm4;{$ELSE}db $C5,$E4,$59,$DC;{$ENDIF} + {$IFDEF FPC}vaddps ymm0, ymm0, ymm3;{$ELSE}db $C5,$FC,$58,$C3;{$ENDIF} + + jmp @Loop1; + + @loopEnd1: + + {$IFDEF FPC}vextractf128 xmm2, ymm0, 1;{$ELSE}db $C4,$E3,$7D,$19,$C2,$01;{$ENDIF} + {$IFDEF FPC}vhaddps xmm0, xmm0, xmm2;{$ELSE}db $C5,$FB,$7C,$C2;{$ENDIF} + + sub r8, 128; + jz @loop2End; + + // loop to get all fitting into an array of 4 + @Loop2: + add r8, 16; + jg @Loop2End; + + {$IFDEF FPC}vmovupd xmm3, [rcx + r8 - 16];{$ELSE}db $C4,$A1,$79,$10,$5C,$01,$F0;{$ENDIF} + {$IFDEF FPC}vmovupd xmm4, [rdx + r8 - 16];{$ELSE}db $C4,$A1,$79,$10,$64,$02,$F0;{$ENDIF} + {$IFDEF FPC}vmulps xmm3, xmm3, xmm4;{$ELSE}db $C5,$E0,$59,$DC;{$ENDIF} + {$IFDEF FPC}vaddps xmm0, xmm0, xmm3;{$ELSE}db $C5,$F8,$58,$C3;{$ENDIF} + jmp @Loop2; + + @Loop2End: + + // handle last 2 elements + sub r8, 16; + jz @loop3End; + + @loop3: + add r8, 4; + jg @loop3End; + + {$IFDEF FPC}vmovss xmm3, [rcx + r8 - 4];{$ELSE}db $C4,$A1,$7A,$10,$5C,$01,$FC;{$ENDIF} + {$IFDEF FPC}vmovss xmm4, [rdx + r8 - 4];{$ELSE}db $C4,$A1,$7A,$10,$64,$02,$FC;{$ENDIF} + {$IFDEF FPC}vmulss xmm3, xmm3, xmm4;{$ELSE}db $C5,$E2,$59,$DC;{$ENDIF} + {$IFDEF FPC}vaddss xmm0, xmm0, xmm3;{$ELSE}db $C5,$FA,$58,$C3;{$ENDIF} + + jmp @loop3; + @loop3End: + + // build result + {$IFDEF FPC}vhaddps xmm0, xmm0, xmm0;{$ELSE}db $C5,$FB,$7C,$C0;{$ENDIF} + {$IFDEF FPC}vhaddps xmm0, xmm0, xmm0;{$ELSE}db $C5,$FB,$7C,$C0;{$ENDIF} + {$IFDEF FPC}vzeroupper;{$ELSE}db $C5,$F8,$77;{$ENDIF} + movss Result, xmm0; +end; + +{$ENDIF} + +end. diff --git a/neural/neuralabfun.pas b/neural/neuralabfun.pas index 07702375..18e31255 100644 --- a/neural/neuralabfun.pas +++ b/neural/neuralabfun.pas @@ -106,7 +106,7 @@ function CreateValidBinaryTest(Val1, Val2: byte; pOp1, pOp2: integer; RelOp1, Re // this array maps OpCode into its string representation const - csStrOp: array[0..csMaxOperations - 1] of string[15] = + csStrOp: array[0..csMaxOperations - 1] of string = //0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 ('nop', '=', '=', '<>', '>', '<', 'V', ' := ', 'inc', 'dec', '+', '-', '*', 'div', 'mod', //15 16 17 18 19 @@ -699,7 +699,7 @@ procedure TCreateValidOperations.Create(Tests, FullEqual: boolean; var ERRORS: array of byte); var LocalNonZeroPrevStates, NonZeroErrors: TPositionArray; - LocalNonZeroPrevStatesCount, NonZeroErrorsCount: integer; + LocalNonZeroPrevStatesCount(*, NonZeroErrorsCount*): integer; LocalNumberOfPreviousStates: integer; LocalPreviousStates: array of byte; OnAction: boolean; @@ -866,7 +866,7 @@ procedure TCreateValidOperations.Create(Tests, FullEqual: boolean; Clear; SetLength(NonZeroErrors,Self.NumberOfCurrentStates); NumberOfErrors := NumberOfNextStates; - NonZeroErrorsCount := getNonZeroElementsPos(NumberOfErrors, ERRORS, NonZeroErrors); + (*NonZeroErrorsCount := *)getNonZeroElementsPos(NumberOfErrors, ERRORS, NonZeroErrors); if not(FCS.TestOnStates) then RunOnActionFlag := 1 else if not(FCS.TestOnActions) then RunOnActionFlag := 0 @@ -1012,6 +1012,7 @@ function TRunOperation.LocalTestTests(var Tests: TTestsClass): integer; efeito: byte; PermissibleErrors: ShortInt; begin + PermissibleErrors := 0; if Tests.N > 0 then begin PermissibleErrors := Tests.N - Tests.TestThreshold; diff --git a/neural/neuralbit.pas b/neural/neuralbit.pas index d2a1b309..6199ee47 100644 --- a/neural/neuralbit.pas +++ b/neural/neuralbit.pas @@ -925,4 +925,4 @@ procedure Clear(var VARS: array of extended); VARS[Cont] := 0; end; -end. { of unit } +end. diff --git a/neural/neuralbyteprediction.pas b/neural/neuralbyteprediction.pas index 6da031f0..73ccd061 100644 --- a/neural/neuralbyteprediction.pas +++ b/neural/neuralbyteprediction.pas @@ -258,6 +258,7 @@ interface // This function returns the probability to win of a given neuron from position pos. function ProbToWin(neuronPos: longint): extended; + public // This function returns all relation indexes with a minimun number of victories (selections) // and a minimum probability MinF. procedure SelectBestIndexes(MinimumNumberOfVictories: longint; MinF: extended); @@ -371,6 +372,7 @@ function TClassifier.AddState(pLabel: integer; pState: array of byte): integer; CurrentState[0] := 0; NextState[0] := pLabel; FStates[FNextFreePos].FTester.Load(FCS, pState, CurrentState, NextState); + Result := FNextFreePos; Inc(FNextFreePos); end; @@ -485,7 +487,7 @@ function TClassifier.MutateNeuronGroup(NG: TNeuronGroup): TNeuronGroup; if (NG.TestNeuronLayer.N > 10) then begin NG.TestNeuronLayer.TestThreshold := - NG.TestNeuronLayer.N - Random(NG.TestNeuronLayer.N div 10); + NG.TestNeuronLayer.N - Random(Integer(NG.TestNeuronLayer.N div 10)); end else begin @@ -718,10 +720,11 @@ procedure TEasyLearnAndPredictClass.Predict(var pActions, pCurrentState: array o begin ABCopy(aActions, pActions); ABCopy(aCurrentState, pCurrentState); + idxCache := -1; if FUseCache then idxCache := FCache.Read(pActions, pPredictedState); Equal := ABCmp(pActions, pCurrentState); - if FUseCache and (idxCache <> -1) and Equal then + if (idxCache <> -1) and Equal then begin FCached := True; end @@ -866,28 +869,37 @@ procedure TStatePredictionClass.AddNeuronsFromStringFromPos(var str: string; pos pStatelen: integer; inputNeuronCnt: integer; begin - version := 1; S := TStringList.Create; - S.Sorted := false; - S.Delimiter := chr(10); - S.StrictDelimiter := true; - S.DelimitedText := str; - - version := StrToInt(S[0]); - evaluation := StrToInt(S[1]); - pActionLen := StrToInt(S[2]); - pStatelen := StrToInt(S[3]); - - //TODO: treat above info here. - - if (S.Count>4) then - begin - neuronPos := pos; - for inputNeuronCnt := 4 to S.Count-1 do - begin - FNN[neuronPos].LoadFromString(S[inputNeuronCnt]); - inc(neuronPos); - end; + try + S.Sorted := false; + S.Delimiter := chr(10); + S.StrictDelimiter := true; + S.DelimitedText := str; + + version := StrToInt(S[0]); + evaluation := StrToInt(S[1]); + pActionLen := StrToInt(S[2]); + pStatelen := StrToInt(S[3]); + + if version <> 1 then + raise Exception.Create( 'Version V' + IntToStr( version ) + '.' + IntToStr(evaluation) + ' found but V1.0 expected'); + + if pActionLen <> FActionByteLen then + raise Exception.Create('Action length differs'); + if pStateLen <> FStateByteLen then + raise Exception.Create('State length differs'); + + if (S.Count>4) then + begin + neuronPos := pos; + for inputNeuronCnt := 4 to S.Count-1 do + begin + FNN[neuronPos].LoadFromString(S[inputNeuronCnt]); + inc(neuronPos); + end; + end; + finally + S.Free; end; end; @@ -1057,11 +1069,11 @@ function TStatePredictionClass.GetBestNeuronIndex(var posBest: longint; if (Actual > Best) then begin - Best := actual; + // Best := actual; // this is due to the exit... posBest := neuronPos; R := True; Result := R; - exit; + exit; // end; end; end; diff --git a/neural/neuraldatasets.pas b/neural/neuraldatasets.pas index 0ac9a79f..ee561449 100644 --- a/neural/neuraldatasets.pas +++ b/neural/neuraldatasets.pas @@ -27,7 +27,7 @@ interface uses - {$IFNDEF FPC}System.Classes,{$ENDIF} + {$IFNDEF FPC} {$IF (CompilerVersion <= 21)} Classes, {$ELSE} System.Classes, {$IFEND} {$ENDIF} neuralvolume, neuralnetwork {$IFDEF FPC}, FPimage, FPReadBMP, FPReadPCX, FPReadJPEG, FPReadPNG, @@ -277,7 +277,7 @@ implementation uses SysUtils, math, neuralthread, - {$IFDEF FPC}fileutil{$ELSE} Winapi.Windows{$ENDIF}; + {$IFDEF FPC}fileutil{$ELSE} Windows{$ENDIF}; {$IFDEF FPC} procedure CreateVolumesFromImagesFromFolder(out ImgTrainingVolumes, ImgValidationVolumes, @@ -822,12 +822,13 @@ function SwapEndian(I:integer):integer; {$ENDIF} procedure TranslateCifar10VolumesToMachineAnimal(VolumeList: TNNetVolumeList); -var - Volume: TNNetVolume; +var i : integer; + Volume: TNNetVolume; begin - for Volume in VolumeList do + for i := 0 to VolumeList.Count - 1 do begin - Volume.Tag := csMachineAnimalCifar10Pos[Volume.Tag]; + volume := volumeList[i]; + Volume.Tag := csMachineAnimalCifar10Pos[Volume.Tag]; end; end; diff --git a/neural/neuralevolutionary.pas b/neural/neuralevolutionary.pas index 84b2c803..64344fc0 100644 --- a/neural/neuralevolutionary.pas +++ b/neural/neuralevolutionary.pas @@ -85,7 +85,6 @@ TEvolutionary = class(TObject) function Evolve(Element: T; RunCnt: integer): T; constructor Create(pAlwaysGetBest: boolean = False; pKidsPerFather: integer = 10); destructor Destroy; override; - published property AlwaysGetBest: boolean read FAlwaysGetBest write FAlwaysGetBest; property KidsPerFather: integer read FKidsPerFather write FKidsPerFather; property LastEval: double read FLastEval; diff --git a/neural/neuralfit.pas b/neural/neuralfit.pas index 588aac91..ae12acb0 100644 --- a/neural/neuralfit.pas +++ b/neural/neuralfit.pas @@ -94,7 +94,7 @@ TNeuralFitBase = class(TMObject) FProcs: TNeuralThreadList; procedure CheckLearningRate(iEpochCount: integer); public - constructor Create(); override; + constructor Create(); destructor Destroy(); override; procedure WaitUntilFinished; {$IFDEF OpenCL} @@ -119,6 +119,7 @@ TNeuralFitBase = class(TMObject) property InitialEpoch: integer read FInitialEpoch write FInitialEpoch; property InitialLearningRate: single read FInitialLearningRate write FInitialLearningRate; property LearningRateDecay: single read FLearningRateDecay write FLearningRateDecay; + property MinLearnRate : single read FMinLearnRate write FMinLearnRate; property LoadBestAtEnd: boolean read FLoadBestAdEnd write FLoadBestAdEnd; property L2Decay: single read FL2Decay write FL2Decay; property MaxThreadNum: integer read FMaxThreadNum write FMaxThreadNum; @@ -152,7 +153,7 @@ TNeuralFitWithImageBase = class(TNeuralFitBase) FColorEncoding: integer; FChannelShiftRate: TNeuralFloat; public - constructor Create(); override; + constructor Create(); destructor Destroy(); override; procedure ClassifyImage(pNN: TNNet; pImgInput, pOutput: TNNetVolume); procedure EnableDefaultImageTreatment(); virtual; @@ -187,7 +188,7 @@ TNeuralDataLoadingFit = class(TNeuralFitWithImageBase) FGetTrainingProc, FGetValidationProc, FGetTestProc: TNNetGet2VolumesProc; function DefaultLossFn(ExpectedOutput, FoundOutput: TNNetVolume; ThreadId: integer): TNeuralFloat; public - constructor Create(); override; + constructor Create(); procedure FitLoading(pNN: TNNet; TrainingCnt, ValidationCnt, TestCnt, pBatchSize, Epochs: integer; pGetTrainingPair, pGetValidationPair, pGetTestPair: TNNetGetPairFn); overload; @@ -231,7 +232,7 @@ TNeuralFit = class(TNeuralDataLoadingFit) function FitValidationPair(Idx: integer; ThreadId: integer): TNNetVolumePair; function FitTestPair(Idx: integer; ThreadId: integer): TNNetVolumePair; public - constructor Create(); override; + constructor Create(); destructor Destroy(); override; procedure Fit(pNN: TNNet; @@ -280,7 +281,7 @@ TNeuralImageFit = class(TNeuralFitWithImageBase) FIsSoftmax: boolean; FTrainingSampleProcessedCnt: TNNetVolume; public - constructor Create(); override; + constructor Create(); destructor Destroy(); override; procedure Fit(pNN: TNNet; @@ -602,7 +603,7 @@ procedure TNeuralDataLoadingFit.FitLoading(pNN: TNNet; TrainingCnt, ' Inertia:' + FloatToStrF(FInertia,ffFixed,8,6) + ' Batch size:' + IntToStr(FBatchSize) + ' Step size:' + IntToStr(FStepSize) + - ' Staircase ephocs:' + IntToStr(FStaircaseEpochs) + ' Staircase epochs:' + IntToStr(FStaircaseEpochs) ); if TrainingCnt > 0 then MessageProc('Training volumes: '+IntToStr(TrainingCnt)); if ValidationCnt > 0 then MessageProc('Validation volumes: '+IntToStr(ValidationCnt)); @@ -1370,6 +1371,7 @@ procedure TNeuralDataLoadingFit.RunTrainingBatch(); if FClipDelta > 0 then begin MaxDelta := FNN.ForceMaxAbsoluteDelta(FClipDelta); + MessageProc('Deltas have maxed to: '+FloatToStr(MaxDelta)); end else begin @@ -1749,7 +1751,7 @@ procedure TNeuralImageFit.Fit(pNN: TNNet; ' Inertia:' + FloatToStrF(FInertia,ffFixed,8,6) + ' Batch size:' + IntToStr(FBatchSize) + ' Step size:' + IntToStr(FStepSize) + - ' Staircase ephocs:' + IntToStr(FStaircaseEpochs) + + ' Staircase epochs:' + IntToStr(FStaircaseEpochs) + ' Min backprop error:' + FloatToStrF(MinBackpropagationError,ffFixed,4,2) ); if Assigned(FImgVolumes) then MessageProc('Training images: '+IntToStr(FImgVolumes.Count)); @@ -1796,6 +1798,7 @@ procedure TNeuralImageFit.Fit(pNN: TNNet; if FClipDelta > 0 then begin MaxDelta := FNN.ForceMaxAbsoluteDelta(FClipDelta); + MessageProc('Deltas have maxed to: '+FloatToStr(MaxDelta)); end else begin diff --git a/neural/neuralnetwork.pas b/neural/neuralnetwork.pas index 960f86f0..dbb81a69 100644 --- a/neural/neuralnetwork.pas +++ b/neural/neuralnetwork.pas @@ -102,10 +102,10 @@ TNNetNeuron = class (TMObject) FBiasWeight: TNeuralFloat; FBiasInertia: TNeuralFloat; FBiasDelta: TNeuralFloat; - public - constructor Create(); override; + constructor Create(); destructor Destroy(); override; + procedure Assign( neuron : TNNetNeuron ); procedure Fill(Value:TNeuralFloat); {$IFDEF Release} inline; {$ENDIF} procedure AddInertia(); {$IFDEF Release} inline; {$ENDIF} procedure UpdateWeights(Inertia:TNeuralFloat); {$IFDEF Release} inline; {$ENDIF} @@ -197,8 +197,10 @@ TNNetLayer = class(TMObject) procedure ApplyActivationFunctionToOutput(); virtual; procedure BuildArrNeurons(); procedure AfterWeightUpdate(); virtual; + protected + procedure Assign( layer : TNNetLayer ); public - constructor Create(); override; + constructor Create(); virtual; destructor Destroy(); override; {$IFDEF OpenCL} @@ -247,7 +249,7 @@ TNNetLayer = class(TMObject) procedure ForcePositiveWeights(); {$IFDEF Release} inline; {$ENDIF} procedure NormalizeWeights(VMax: TNeuralFloat); {$IFDEF Release} inline; {$ENDIF} function SaveDataToString(): string; virtual; - procedure LoadDataFromString(strData: string); virtual; + procedure LoadDataFromString(strData: string); function SaveStructureToString(): string; virtual; procedure SetBatchUpdate(pBatchUpdate: boolean); {$IFDEF Release} inline; {$ENDIF} procedure UpdateWeights(); {$IFDEF Release} inline; {$ENDIF} @@ -338,9 +340,9 @@ TNNetInputBase = class(TNNetLayer) // need to backpropagate errors up to the input, call EnableErrorCollection. TNNetInput = class(TNNetInputBase) public - constructor Create(pSize: integer); overload; - constructor Create(pSizeX, pSizeY, pDepth: integer); overload; - constructor Create(pSizeX, pSizeY, pDepth, pError: integer); overload; + constructor Create(pSize: integer); reintroduce; overload; + constructor Create(pSizeX, pSizeY, pDepth: integer); reintroduce; overload; + constructor Create(pSizeX, pSizeY, pDepth, pError: integer); reintroduce; overload; function EnableErrorCollection: TNNetInput; function DisableErrorCollection: TNNetInput; @@ -359,7 +361,7 @@ TNNetIdentity = class(TNNetLayer) /// This layer allows you to debug activation and backpropagation of an TNNetDebug = class(TNNetIdentity) public - constructor Create(hasForward, hasBackward: integer); overload; + constructor Create(hasForward, hasBackward: integer); reintroduce; overload; procedure Compute(); override; procedure Backpropagate(); override; end; @@ -372,7 +374,7 @@ TNNetPad = class(TNNetLayer) FPadding: integer; procedure SetPrevLayer(pPrevLayer: TNNetLayer); override; public - constructor Create(Padding: integer); overload; + constructor Create(Padding: integer); reintroduce; overload; procedure Compute(); override; procedure Backpropagate(); override; end; @@ -407,7 +409,7 @@ TNNetDigital = class(TNNetIdentity) FLowValue, FHighValue: TNeuralFloat; FMiddleDist: TNeuralFloat; public - constructor Create(LowValue, HighValue: integer); overload; + constructor Create(LowValue, HighValue: integer); reintroduce; overload; procedure Compute(); override; procedure Backpropagate(); override; end; @@ -425,7 +427,7 @@ TNNetReLUL = class(TNNetReLUBase) private FScale, FLowLimit, FHighLimit: TNeuralFloat; public - constructor Create(LowLimit, HighLimit, Leakiness: integer); overload; + constructor Create(LowLimit, HighLimit, Leakiness: integer); reintroduce; overload; procedure Compute(); override; end; @@ -474,7 +476,7 @@ TNNetPower = class(TNNetReLUBase) private FPower: TNeuralFloat; public - constructor Create(iPower: integer); overload; + constructor Create(iPower: integer); reintroduce; overload; procedure Compute(); override; end; @@ -516,7 +518,7 @@ TNNetHyperbolicTangent = class(TNNetSigmoid) // learning but can also provoke overflows. TNNetMulLearning = class(TNNetIdentity) public - constructor Create(pMul: integer); overload; + constructor Create(pMul: integer); reintroduce; overload; procedure Backpropagate(); override; end; @@ -536,7 +538,7 @@ TNNetNegate = class(TNNetMulByConstant) /// This is an experimental layer. Do not use it. TNNetAddAndDiv = class(TNNetIdentity) public - constructor Create(pAdd, pDiv: integer); overload; + constructor Create(pAdd, pDiv: integer); reintroduce; overload; procedure Compute(); override; end; @@ -556,7 +558,7 @@ TNNetDropout = class(TNNetAddNoiseBase) private procedure SetPrevLayer(pPrevLayer: TNNetLayer); override; public - constructor Create(Rate: double; OneMaskPerbatch: integer = 1); overload; + constructor Create(Rate: double; OneMaskPerbatch: integer = 1); reintroduce; overload; destructor Destroy(); override; procedure Compute(); override; procedure Backpropagate(); override; @@ -573,7 +575,7 @@ TNNetRandomMulAdd = class(TNNetAddNoiseBase) protected FRandomBias, FRandomMul: TNeuralFloat; public - constructor Create(AddRate, MulRate: integer); overload; + constructor Create(AddRate, MulRate: integer); reintroduce; overload; procedure Compute(); override; procedure Backpropagate(); override; end; @@ -584,7 +586,7 @@ TNNetChannelRandomMulAdd = class(TNNetAddNoiseBase) protected FRandomBias, FRandomMul: TNNetVolume; public - constructor Create(AddRate, MulRate: integer); overload; + constructor Create(AddRate, MulRate: integer); reintroduce; overload; destructor Destroy; override; procedure SetPrevLayer(pPrevLayer: TNNetLayer); override; procedure Compute(); override; @@ -668,8 +670,8 @@ TNNetChannelMulByLayer = class(TNNetChannelTransformBase) FLayerWithChannels, FLayerMul: TNNetLayer; procedure SetPrevLayer(pPrevLayer: TNNetLayer); override; public - constructor Create(LayerWithChannels, LayerMul: TNNetLayer); overload; - constructor Create(LayerWithChannelsIdx, LayerMulIdx: integer); overload; + constructor Create(LayerWithChannels, LayerMul: TNNetLayer); reintroduce; overload; + constructor Create(LayerWithChannelsIdx, LayerMulIdx: integer); reintroduce; overload; procedure Compute(); override; procedure Backpropagate(); override; end; @@ -682,8 +684,8 @@ TNNetCellMulByCell = class(TNNetChannelTransformBase) FLayerA, FLayerB: TNNetLayer; procedure SetPrevLayer(pPrevLayer: TNNetLayer); override; public - constructor Create(LayerA, LayerB: TNNetLayer); overload; - constructor Create(LayerAIdx, LayerBIdx: integer); overload; + constructor Create(LayerA, LayerB: TNNetLayer); reintroduce; overload; + constructor Create(LayerAIdx, LayerBIdx: integer); reintroduce; overload; procedure Compute(); override; procedure Backpropagate(); override; end; @@ -742,7 +744,7 @@ TNNetLocalResponseNorm2D = class(TNNetIdentity) private FLRN: TNNetVolume; public - constructor Create(pSize: integer); overload; + constructor Create(pSize: integer); reintroduce; overload; destructor Destroy(); override; procedure Compute(); override; @@ -755,7 +757,7 @@ TNNetInterleaveChannels = class(TNNetIdentity) ToChannels: TNeuralIntegerArray; procedure SetPrevLayer(pPrevLayer: TNNetLayer); override; public - constructor Create(StepSize: integer); overload; + constructor Create(StepSize: integer); reintroduce; overload; destructor Destroy(); override; procedure Compute(); override; @@ -774,7 +776,7 @@ TNNetReshape = class(TNNetLayer) private procedure SetPrevLayer(pPrevLayer: TNNetLayer); override; public - constructor Create(pSizeX, pSizeY, pDepth: integer); overload; + constructor Create(pSizeX, pSizeY, pDepth: integer); reintroduce; overload; procedure Compute(); override; procedure Backpropagate(); override; @@ -800,8 +802,8 @@ TNNetConcatBase = class(TNNetLayer) // XY size. TNNetConcat = class(TNNetConcatBase) public - constructor Create(pSizeX, pSizeY, pDepth: integer; aL: array of TNNetLayer); overload; - constructor Create(aL: array of TNNetLayer); overload; + constructor Create(pSizeX, pSizeY, pDepth: integer; aL: array of TNNetLayer); reintroduce; overload; + constructor Create(aL: array of TNNetLayer); reintroduce; overload; procedure Compute(); override; procedure Backpropagate(); override; @@ -815,7 +817,7 @@ TNNetDeepConcat = class(TNNetConcatBase) FDeepsChannel: TNeuralIntegerArray; FRemainingChannels: TNeuralIntegerArray; public - constructor Create(aL: array of TNNetLayer); overload; + constructor Create(aL: array of TNNetLayer); reintroduce; overload; destructor Destroy(); override; procedure Compute(); override; @@ -825,7 +827,7 @@ TNNetDeepConcat = class(TNNetConcatBase) /// This layer sums layers of same size allowing resnet style layers. TNNetSum = class(TNNetConcatBase) public - constructor Create(aL: array of TNNetLayer); overload; + constructor Create(aL: array of TNNetLayer); reintroduce; overload; destructor Destroy(); override; procedure Compute(); override; @@ -838,8 +840,8 @@ TNNetSplitChannels = class(TNNetLayer) FChannels: TNeuralIntegerArray; procedure SetPrevLayer(pPrevLayer: TNNetLayer); override; public - constructor Create(ChannelStart, ChannelLen: integer); overload; - constructor Create(pChannels: array of integer); overload; + constructor Create(ChannelStart, ChannelLen: integer); reintroduce; overload; + constructor Create(pChannels: array of integer); reintroduce; overload; destructor Destroy(); override; procedure Compute(); override; @@ -863,8 +865,8 @@ TNNetFullConnect = class(TNNetLayerConcatedWeights) procedure ComputePreviousLayerError(); override; procedure ComputePreviousLayerErrorCPU(); virtual; public - constructor Create(pSizeX, pSizeY, pDepth: integer; pSuppressBias: integer = 0); overload; virtual; - constructor Create(pSize:integer; pSuppressBias: integer = 0); overload; + constructor Create(pSizeX, pSizeY, pDepth: integer; pSuppressBias: integer = 0); reintroduce; overload; virtual; + constructor Create(pSize:integer; pSuppressBias: integer = 0); reintroduce; overload; procedure Compute(); override; procedure ComputeCPU(); virtual; procedure Backpropagate(); override; @@ -953,14 +955,14 @@ TNNetConvolutionAbstract = class(TNNetLayerConcatedWeights) procedure RefreshCalculatePrevLayerError(); procedure SetPrevLayer(pPrevLayer: TNNetLayer); override; public - constructor Create(pFeatureSize, pInputPadding, pStride: integer; pSuppressBias: integer = 0); overload; + constructor Create(pFeatureSize, pInputPadding, pStride: integer; pSuppressBias: integer = 0); reintroduce; overload; destructor Destroy(); override; procedure InitDefault(); override; end; /// This class does a depthwise convolution. TNNetDepthwiseConv = class(TNNetConvolutionAbstract) - private + protected procedure SetPrevLayer(pPrevLayer: TNNetLayer); override; procedure BackpropagateCPU(); {$IFDEF Release} inline; {$ENDIF} procedure BackpropagateCPUFast(); @@ -987,6 +989,12 @@ TNNetDepthwiseConvReLU = class(TNNetDepthwiseConv) constructor Create(pMultiplier, pFeatureSize, pInputPadding, pStride: integer); override; end; + TNNetDepthwiseConvGeLU = class(TNNetDepthwiseConv) + public + constructor Create(pMultiplier, pFeatureSize, pInputPadding, pStride: integer); override; + end; + + /// This is a base class. Do not use it directly. TNNetConvolutionBase = class(TNNetConvolutionAbstract) private @@ -998,6 +1006,7 @@ TNNetConvolutionBase = class(TNNetConvolutionAbstract) FMaxTileX, FMaxTileD: integer; FTileSizeX, FTileSizeD: integer; + protected {$IFDEF Debug} procedure PrepareInputForConvolution(); overload; {$IFDEF Release} inline; {$ENDIF} procedure PrepareInputForConvolution(OutputX, OutputY: integer); overload; {$IFDEF Release} inline; {$ENDIF} @@ -1026,6 +1035,7 @@ TNNetGroupedConvolutionLinear = class(TNNetConvolutionBase) procedure PrepareInputForGroupedConvolutionFast(); procedure ComputeCPU(); procedure BackpropagateCPU(); + protected procedure SetPrevLayer(pPrevLayer: TNNetLayer); override; public constructor Create(pNumFeatures, pFeatureSize, pInputPadding, pStride, pGroups: integer; pSuppressBias: integer = 0); overload; virtual; @@ -1059,7 +1069,6 @@ TNNetGroupedPointwiseConvReLU = class(TNNetGroupedPointwiseConvLinear) TNNetConvolution = class(TNNetConvolutionBase) protected procedure BackpropagateAtOutputPos(pCanBackpropOnPos: boolean; OutputRawPos, OutputX, OutputY, OutputD, PrevX, PrevY: integer); {$IFDEF Release} inline; {$ENDIF} - private procedure ComputeCPU(); procedure ComputeTiledCPU(); procedure ComputeInterleaved(); @@ -1068,6 +1077,7 @@ TNNetConvolution = class(TNNetConvolutionBase) procedure BackpropagateFastTiledCPU(); procedure BackpropagateFastCPUDev(); // Backprop CPU development version (do not use it) + protected {$IFDEF OpenCL} procedure ComputeOpenCL(); {$ENDIF} @@ -1108,6 +1118,21 @@ TNNetConvolutionReLU = class(TNNetConvolution) constructor Create(pNumFeatures, pFeatureSize, pInputPadding, pStride: integer; pSuppressBias: integer = 0); override; end; + // convolutional layer with GeLU activation function + TNNetConvolutionGeLU = class(TNNetConvolution) + public + constructor Create(pNumFeatures, pFeatureSize, pInputPadding, pStride: integer; pSuppressBias: integer = 0); override; + end; + + TNNetConvolutionSwish6 = class(TNNetConvolution) + public + constructor Create(pNumFeatures, pFeatureSize, pInputPadding, pStride: integer; pSuppressBias: integer = 0); override; + end; + TNNetConvolutionSwish = class(TNNetConvolution) + public + constructor Create(pNumFeatures, pFeatureSize, pInputPadding, pStride: integer; pSuppressBias: integer = 0); override; + end; + /// Pointwise convolution with tanh activation. TNNetPointwiseConv = class(TNNetConvolution) public @@ -1140,7 +1165,7 @@ TNNetDeconvolutionReLU = class(TNNetConvolutionReLU) { TNNetLocalConnect } TNNetLocalConnect = class(TNNetConvolutionBase) - private + protected procedure BackpropagateAtOutputPos(OutputX, OutputY, OutputD: integer); {$IFDEF Release} inline; {$ENDIF} procedure SetPrevLayer(pPrevLayer: TNNetLayer); override; public @@ -1153,7 +1178,7 @@ TNNetLocalConnect = class(TNNetConvolutionBase) { TNNetLocalProduct } // This is an experimental layer. Do not use it yet. TNNetLocalProduct = class(TNNetConvolutionBase) - private + protected procedure BackpropagateAtOutputPos(OutputX, OutputY, OutputD: integer); {$IFDEF Release} inline; {$ENDIF} procedure SetPrevLayer(pPrevLayer: TNNetLayer); override; public @@ -1196,7 +1221,7 @@ TNNetPoolBase = class(TNNetLayer) procedure BackpropagateWithStride(); procedure ComputePreviousLayerError(); override; public - constructor Create(pPoolSize: integer; pStride:integer = 0; pPadding: integer = 0); overload; + constructor Create(pPoolSize: integer; pStride:integer = 0; pPadding: integer = 0); reintroduce; overload; virtual; destructor Destroy(); override; procedure Backpropagate(); override; end; @@ -1287,6 +1312,7 @@ TNNetUpsample = class(TNNetDeMaxPool) TNNetDeAvgPool = class(TNNetDeMaxPool); /// neural network + TTNNetProgress = procedure(Sender : TObject; numCalc, numTotal : integer; var cancel : boolean ) of Object; TNNet = class(TMObject) protected FLayers: TNNetLayerList; @@ -1298,8 +1324,11 @@ TNNet = class(TMObject) {$IFDEF OpenCL} FDotProductKernel: TDotProductKernel; {$ENDIF} + fProgress : TTNNetProgress; public - constructor Create(); override; + property OnProgress : TTNNetProgress read fProgress write fProgress; // called in Compute! + public + constructor Create(); destructor Destroy(); override; function CreateLayer(strData: string): TNNetLayer; @@ -1481,7 +1510,6 @@ TNNet = class(TMObject) // custom layers support function ShouldIncDepartingBranchesCnt(pLayer: TNNetLayer):boolean; virtual; - published property BackwardTime: double read FBackwardTime write FBackwardTime; property ForwardTime: double read FForwardTime write FForwardTime; property Layers: TNNetLayerList read FLayers; @@ -1622,7 +1650,7 @@ TNNetByteProcessing = class(TNNetIdentity) FActionBytes: array of byte; procedure SetPrevLayer(pPrevLayer: TNNetLayer); override; public - constructor Create(CacheSize, TestCount, OperationCount: integer); overload; + constructor Create(CacheSize, TestCount, OperationCount: integer); reintroduce; overload; destructor Destroy; override; procedure Compute(); override; procedure Backpropagate(); override; @@ -1633,7 +1661,7 @@ TNNetForByteProcessing = class(TNNet) private FInput, FOutput: TNNetVolume; public - constructor Create(); override; + constructor Create(); destructor Destroy(); override; procedure AddBasicByteProcessingLayers(InputByteCount, OutputByteCount: integer; @@ -2098,7 +2126,7 @@ procedure TNNetGroupedConvolutionLinear.PrepareInputForGroupedConvolutionFast(); ChannelsPerGroup, ChannelsPerGroupSize: integer; yCount, xCount, groupCount: integer; InputX, InputY: integer; - RowSize: integer; + //RowSize: integer; FeatureSizeXYD: integer; {$IFDEF AVXANY} SourceRawPos, DestRawPos: pointer; @@ -2111,7 +2139,7 @@ procedure TNNetGroupedConvolutionLinear.PrepareInputForGroupedConvolutionFast(); else begin ChannelsPerGroup := FInputCopy.Depth div FStruct[5]; - RowSize := ChannelsPerGroup; + //RowSize := ChannelsPerGroup; ChannelsPerGroupSize := ChannelsPerGroup * SizeOf(TNeuralFloat); MaxX := FOutput.SizeX - 1; MaxY := FOutput.SizeY - 1; @@ -2178,7 +2206,7 @@ procedure TNNetGroupedConvolutionLinear.BackpropagateCPU(); var OutputX, OutputY, OutputD: integer; MaxX, MaxY, MaxD: integer; - GroupId, GroupDSize, GroupDStart: integer; + (*GroupId, *)GroupDSize, GroupDStart: integer; PrevX, PrevY: integer; OutputRawPos: integer; CanBackpropOnPos: boolean; @@ -2192,7 +2220,9 @@ procedure TNNetGroupedConvolutionLinear.BackpropagateCPU(); //PrevNumElements: integer; MissedElements: integer; //, PrevMissedElements: integer; + {$IFDEF AVX64} PtrNeuronDelta: TNeuralFloatArrPtr; + {$ENDIF} PtrPreparedInput: TNeuralFloatArrPtr; //PrevPtrA, PrevPtrB: TNeuralFloatArrPtr; NeuronWeights: integer; @@ -2205,6 +2235,7 @@ procedure TNNetGroupedConvolutionLinear.BackpropagateCPU(); MaxX := OutputError.SizeX - 1; MaxY := OutputError.SizeY - 1; MaxD := OutputError.Depth - 1; + LocalDestPtr := nil; // Debug code: FOutputError.ForceMaxAbs(1); GroupDSize := OutputError.Depth div FStruct[5]; LocalPrevError := FPrevLayer.OutputError; @@ -2238,7 +2269,7 @@ procedure TNNetGroupedConvolutionLinear.BackpropagateCPU(); OutputRawPos := FOutputErrorDeriv.GetRawPos(OutputX, OutputY, StartTileD); for OutputD := StartTileD to EndTileD do begin - GroupId := FArrGroupId[OutputD]; + //GroupId := FArrGroupId[OutputD]; GroupDStart := FArrGroupIdStart[OutputD]; if (FCalculatePrevLayerError and CanBackpropOnPos) then LocalDestPtr := LocalPrevError.GetRawPtr(PrevX, PrevY, GroupDStart); @@ -2976,25 +3007,23 @@ procedure TNNetLocalProduct.Compute(); procedure TNNetLocalProduct.ComputeCPU(); var OutputCntX, OutputCntY, OutputCntD: integer; - InputCntX, InputCntY: integer; + //InputCntX, InputCntY: integer; MaxX, MaxY, MaxD: integer; LocalSize: integer; PtrA: TNeuralFloatArrPtr; OutputIdx: integer; Product: TNeuralFloat; - CntXYD: integer; begin MaxX := FOutput.SizeX - 1; MaxY := FOutput.SizeY - 1; MaxD := FOutput.Depth - 1; LocalSize := FFeatureSizeX*FFeatureSizeY*FInputCopy.Depth; - InputCntX := 0; + // InputCntX := 0; OutputCntX := 0; - CntXYD := 0; while OutputCntX <= MaxX do begin - InputCntY := 0; + //InputCntY := 0; OutputCntY := 0; while OutputCntY <= MaxY do begin @@ -3009,12 +3038,11 @@ procedure TNNetLocalProduct.ComputeCPU(); FOutputRaw.FData[OutputIdx] := Product; FOutput.FData[OutputIdx] := Product; Inc(OutputCntD); - Inc(CntXYD); end; - Inc(InputCntY, FStride); + // Inc(InputCntY, FStride); Inc(OutputCntY); end; - Inc(InputCntX, FStride); + // Inc(InputCntX, FStride); Inc(OutputCntX); end; (* @@ -3616,6 +3644,15 @@ constructor TNNetDepthwiseConvReLU.Create(pMultiplier, pFeatureSize, FActivationFnDerivative := @RectifiedLinearUnitDerivative; end; +{ TNNetDepthwiseConvGeLU } +constructor TNNetDepthwiseConvGeLU.Create(pMultiplier, pFeatureSize, + pInputPadding, pStride: integer); +begin + inherited Create(pMultiplier, pFeatureSize, pInputPadding, pStride); + FActivationFn := @GaussErrorLinUnit; + FActivationFnDerivative := @GaussErrorLinUnitDerivative; +end; + { TNNetDepthwiseConvLinear } constructor TNNetDepthwiseConvLinear.Create(pMultiplier, pFeatureSize, pInputPadding, pStride: integer); @@ -4893,7 +4930,13 @@ procedure TNNetLayerConcatedWeights.RefreshNeuronWeightList(); procedure TNNetConvolutionBase.EnableOpenCL(DotProductKernel: TDotProductKernel); begin inherited EnableOpenCL(DotProductKernel); - FDotCL.PrepareForCompute(FConcatedWInter, FInputPrepared, FVectorSize); + + // fDotCL is not assigned in case fShouldOpenCL is false + if Assigned(FDotCL) + then + FDotCL.PrepareForCompute(FConcatedWInter, FInputPrepared, FVectorSize) + else + FHasOpenCL := False; end; procedure TNNetLayerConcatedWeights.EnableOpenCL( @@ -6492,7 +6535,6 @@ procedure TestConvolutionAPI(); NN: THistoricalNets; NN2: TNNet; AuxVolume: TNNetVolume; - I: integer; begin NN := THistoricalNets.Create(); AuxVolume := TNNetVolume.Create; @@ -7334,7 +7376,7 @@ procedure TNNetDeMaxPool.ComputePreviousLayerError(); RawPos, PrevRawPos: integer; PrevPosX, PrevPosY: integer; floatPoolSize: TNeuralFloat; - OutX, OutY: integer; +// OutX, OutY: integer; begin MaxD := Output.Depth - 1; @@ -7525,13 +7567,13 @@ procedure TNNetReshape.Compute; procedure TNNetReshape.Backpropagate; var - Len: integer; +// Len: integer; StartTime: double; begin StartTime := Now(); Inc(FBackPropCallCurrentCnt); if FBackPropCallCurrentCnt < FDepartingBranchesCnt then exit; - Len := Min(FOutput.Size, FPrevLayer.FOutput.Size); +// Len := Min(FOutput.Size, FPrevLayer.FOutput.Size); //TODO: check this for possible crash. FPrevLayer.FOutputError.Add(FOutputError); FBackwardTime := FBackwardTime + (Now() - StartTime); @@ -7683,26 +7725,21 @@ procedure TNNetLocalConnect.Compute(); procedure TNNetLocalConnect.ComputeCPU(); var OutputCntX, OutputCntY, OutputCntD: integer; - InputCntX, InputCntY: integer; MaxX, MaxY, MaxD: integer; LocalSize: integer; LocalW: TNNetVolume; PtrA, PtrB: TNeuralFloatArrPtr; NeuronIdx: integer; Sum: TNeuralFloat; - CntXYD: integer; begin MaxX := FOutput.SizeX - 1; MaxY := FOutput.SizeY - 1; MaxD := FOutput.Depth - 1; LocalSize := FFeatureSizeX*FFeatureSizeY*FInputCopy.Depth; - InputCntX := 0; OutputCntX := 0; - CntXYD := 0; while OutputCntX <= MaxX do begin - InputCntY := 0; OutputCntY := 0; while OutputCntY <= MaxY do begin @@ -7720,12 +7757,9 @@ procedure TNNetLocalConnect.ComputeCPU(); FOutputRaw.FData[NeuronIdx] := Sum; FOutput.FData[NeuronIdx] := FActivationFn(Sum); Inc(OutputCntD); - Inc(CntXYD); end; - Inc(InputCntY, FStride); Inc(OutputCntY); end; - Inc(InputCntX, FStride); Inc(OutputCntX); end; end; @@ -7904,6 +7938,34 @@ constructor TNNetConvolutionReLU.Create(pNumFeatures, pFeatureSize, FActivationFnDerivative := @RectifiedLinearUnitDerivative; end; +{ TNNetConvolutionGeLU } + +constructor TNNetConvolutionGeLU.Create(pNumFeatures, pFeatureSize, + pInputPadding, pStride: integer; pSuppressBias: integer = 0); +begin + inherited Create(pNumFeatures, pFeatureSize, pInputPadding, pStride, pSuppressBias); + FActivationFn := @GaussErrorLinUnit; + FActivationFnDerivative := @GaussErrorLinUnitDerivative; +end; + +{ TNNetConvolutionSwish6 } + +constructor TNNetConvolutionSwish6.Create(pNumFeatures, pFeatureSize, + pInputPadding, pStride, pSuppressBias: integer); +begin + inherited Create(pNumFeatures, pFeatureSize, pInputPadding, pStride, pSuppressBias); + FActivationFn := @Swish6Unit; + FActivationFnDerivative := @Swish6Derivative; +end; + +constructor TNNetConvolutionSwish.Create(pNumFeatures, pFeatureSize, + pInputPadding, pStride, pSuppressBias: integer); +begin + inherited Create(pNumFeatures, pFeatureSize, pInputPadding, pStride, pSuppressBias); + FActivationFn := @SwishUnit; + FActivationFnDerivative := @SwishDerivative; +end; + { TNNetPoolBase } procedure TNNetPoolBase.SetPrevLayer(pPrevLayer: TNNetLayer); var @@ -8424,7 +8486,7 @@ procedure TNNetConvolutionBase.PrepareInputForConvolutionFast(); DepthFSize, SizeOfDepthFSize: integer; yCount: integer; InputX: integer; - RowSize: integer; + //RowSize: integer; {$IFDEF AVXANY} SourceRawPos, DestRawPos: pointer; {$ENDIF} @@ -8436,7 +8498,7 @@ procedure TNNetConvolutionBase.PrepareInputForConvolutionFast(); else begin DepthFSize := FInputCopy.Depth * FFeatureSizeX; - RowSize := DepthFSize; + //RowSize := DepthFSize; SizeOfDepthFSize := DepthFSize * SizeOf(TNeuralFloat); MaxX := FOutput.SizeX - 1; MaxY := FOutput.SizeY - 1; @@ -8651,28 +8713,30 @@ procedure TNNetConvolution.BackpropagateFastCPU(); SmoothLocalOutputErrorDeriv: TNeuralFloat; LocalWeight, LocalPrevError: TNNetVolume; {SrcPtr,} LocalDestPtr: TNeuralFloatArrPtr; - SmoothLocalOutputErrorDerivPtr: pointer; - PrevNumElements, PrevMissedElements: integer; - PtrNeuronDelta, PtrPreparedInput: TNeuralFloatArrPtr; +// SmoothLocalOutputErrorDerivPtr: pointer; +// PrevNumElements, PrevMissedElements: integer; + {$IFDEF AVX64}PtrNeuronDelta, {$ENDIF} PtrPreparedInput: TNeuralFloatArrPtr; PrevPtrA, PrevPtrB: TNeuralFloatArrPtr; - NeuronWeights: integer; - LocalLearningErrorDerivPtr: pointer; - localNumElements, MissedElements: integer; +// NeuronWeights: integer; +// LocalLearningErrorDerivPtr: pointer; +// localNumElements : Integer; +// MissedElements: integer; MaxPrevX, MaxPrevY: integer; begin MaxX := OutputError.SizeX - 1; MaxY := OutputError.SizeY - 1; MaxD := OutputError.Depth - 1; + LocalDestPtr := nil; MaxPrevX := 1 + FPrevLayer.FOutputError.SizeX - FFeatureSizeX; MaxPrevY := 1 + FPrevLayer.FOutputError.SizeY - FFeatureSizeY; LocalPrevError := FPrevLayer.OutputError; - PrevNumElements := (FSizeXDepth div 4) * 4; - PrevMissedElements := FSizeXDepth - PrevNumElements; - NeuronWeights := FArrNeurons[0].Delta.Size; - localNumElements := (NeuronWeights div 4) * 4; - MissedElements := NeuronWeights - localNumElements; - SmoothLocalOutputErrorDerivPtr := Addr(SmoothLocalOutputErrorDeriv); - LocalLearningErrorDerivPtr := Addr(LocalLearningErrorDeriv); +// PrevNumElements := (FSizeXDepth div 4) * 4; +// PrevMissedElements := FSizeXDepth - PrevNumElements; +// NeuronWeights := FArrNeurons[0].Delta.Size; +// localNumElements := (NeuronWeights div 4) * 4; +// MissedElements := NeuronWeights - localNumElements; +// SmoothLocalOutputErrorDerivPtr := Addr(SmoothLocalOutputErrorDeriv); +// LocalLearningErrorDerivPtr := Addr(LocalLearningErrorDeriv); begin for OutputY := 0 to MaxY do begin @@ -8827,7 +8891,7 @@ procedure TNNetConvolution.BackpropagateFastTiledCPU(); {SrcPtr,} LocalDestPtr: TNeuralFloatArrPtr; SmoothLocalOutputErrorDerivPtr: pointer; PrevNumElements, PrevMissedElements: integer; - PtrNeuronDelta, PtrPreparedInput: TNeuralFloatArrPtr; + {$IFDEF AVX64}PtrNeuronDelta, {$ENDIF} PtrPreparedInput: TNeuralFloatArrPtr; PrevPtrA, PrevPtrB: TNeuralFloatArrPtr; NeuronWeights: integer; LocalLearningErrorDerivPtr: pointer; @@ -8840,6 +8904,7 @@ procedure TNNetConvolution.BackpropagateFastTiledCPU(); MaxX := OutputError.SizeX - 1; MaxY := OutputError.SizeY - 1; MaxD := OutputError.Depth - 1; + LocalDestPtr := nil; MaxPrevX := 1 + FPrevLayer.FOutputError.SizeX - FFeatureSizeX; MaxPrevY := 1 + FPrevLayer.FOutputError.SizeY - FFeatureSizeY; LocalPrevError := FPrevLayer.OutputError; @@ -9014,7 +9079,7 @@ procedure TNNetConvolution.BackpropagateFastCPUDev(); {SrcPtr,} LocalDestPtr: TNeuralFloatArrPtr; SmoothLocalOutputErrorDerivPtr: pointer; PrevNumElements, PrevMissedElements: integer; - PtrNeuronDelta, PtrPreparedInput: TNeuralFloatArrPtr; + {$IFDEF AVX64}PtrNeuronDelta : TNeuralFloatArrPtr; {$ENDIF} PrevPtrA, PrevPtrB: TNeuralFloatArrPtr; NeuronWeights: integer; LocalLearningErrorDerivPtr: pointer; @@ -9029,6 +9094,7 @@ procedure TNNetConvolution.BackpropagateFastCPUDev(); MaxX := OutputError.SizeX - 1; MaxY := OutputError.SizeY - 1; MaxD := OutputError.Depth - 1; + LocalDestPtr := nil; MaxPrevX := 1 + FPrevLayer.FOutputError.SizeX - FFeatureSizeX; MaxPrevY := 1 + FPrevLayer.FOutputError.SizeY - FFeatureSizeY; LocalPrevError := FPrevLayer.OutputError; @@ -9048,7 +9114,7 @@ procedure TNNetConvolution.BackpropagateFastCPUDev(); PrevX := (OutputX*FStride)-FPadding; OutputRawPos := FOutputErrorDeriv.GetRawPos(OutputX, OutputY); if (FCalculatePrevLayerError) then LocalDestPtr := LocalPrevError.GetRawPtr(OutputX, OutputY); - PtrPreparedInput := FInputPrepared.GetRawPtr(OutputX, OutputY); + //PtrPreparedInput := FInputPrepared.GetRawPtr(OutputX, OutputY); CanBackpropOnPos := (PrevX >= 0) and (PrevY >= 0) and (PrevX < MaxPrevX) and @@ -9213,8 +9279,8 @@ destructor TNNetConvolutionAbstract.Destroy(); procedure TNNetConvolutionAbstract.InitDefault(); {$IFDEF Debug} -var - MaxAbsW: TNeuralFloat; +//var +// MaxAbsW: TNeuralFloat; {$ENDIF} begin // Although Keras works better with Glorot, CAI seems to work better with He. @@ -9520,7 +9586,8 @@ procedure TNNetFullConnect.EnableOpenCL(DotProductKernel: TDotProductKernel); begin RefreshNeuronWeightList(); AfterWeightUpdate(); - FDotCL.PrepareForCompute(FConcatedWInter, FPrevLayer.FOutput, FVectorSize); + if FDotCL.PrepareForCompute(FConcatedWInter, FPrevLayer.FOutput, FVectorSize) <> CL_SUCCESS then + FreeAndNil(fDotCL); end; end; {$ENDIF} @@ -9826,185 +9893,97 @@ function TNNet.CreateLayer(strData: string): TNNetLayer; end; end; - {$IFDEF FPC} - case S[0] of - 'TNNetInput' : Result := TNNetInput.Create(St[0], St[1], St[2], St[3]); - 'TNNetIdentity' : Result := TNNetIdentity.Create(); - 'TNNetDebug' : Result := TNNetDebug.Create(St[0], St[1]); - 'TNNetPad' : Result := TNNetPad.Create(St[0]); - 'TNNetIdentityWithoutBackprop': Result := TNNetIdentityWithoutBackprop.Create(); - 'TNNetReLU' : Result := TNNetReLU.Create(); - 'TNNetSwish' : Result := TNNetSwish.Create(); - 'TNNetSwish6' : Result := TNNetSwish6.Create(); - 'TNNetReLUSqrt': Result := TNNetReLUSqrt.Create(); - 'TNNetReLUL' : Result := TNNetReLUL.Create(St[0], St[1], St[2]); - 'TNNetReLU6' : Result := TNNetReLU6.Create(St[2]); - 'TNNetPower' : Result := TNNetPower.Create(St[0]); - 'TNNetSELU' : Result := TNNetSELU.Create(); - 'TNNetLeakyReLU' : Result := TNNetLeakyReLU.Create(); - 'TNNetVeryLeakyReLU' : Result := TNNetVeryLeakyReLU.Create(); - 'TNNetSigmoid' : Result := TNNetSigmoid.Create(); - 'TNNetHyperbolicTangent' : Result := TNNetHyperbolicTangent.Create(); - 'TNNetDropout' : Result := TNNetDropout.Create(1/St[0], St[1]); - 'TNNetReshape' : Result := TNNetReshape.Create(St[0], St[1], St[2]); - 'TNNetLayerFullConnect' : Result := TNNetFullConnect.Create(St[0], St[1], St[2], St[3]); - 'TNNetFullConnect' : Result := TNNetFullConnect.Create(St[0], St[1], St[2], St[3]); - 'TNNetFullConnectSigmoid': Result := TNNetFullConnectSigmoid.Create(St[0], St[1], St[2], St[3]); - 'TNNetFullConnectDiff' : Result := TNNetFullConnectDiff.Create(St[0], St[1], St[2], St[3]); - 'TNNetLayerFullConnectReLU' : Result := TNNetFullConnectReLU.Create(St[0], St[1], St[2], St[3]); - 'TNNetFullConnectReLU' : Result := TNNetFullConnectReLU.Create(St[0], St[1], St[2], St[3]); - 'TNNetFullConnectLinear' : Result := TNNetFullConnectLinear.Create(St[0], St[1], St[2], St[3]); - 'TNNetLocalConnect' : Result := TNNetLocalConnect.Create(St[0], St[1], St[2], St[3], St[4]); - 'TNNetLocalProduct' : Result := TNNetLocalProduct.Create(St[0], St[1], St[2], St[3], St[4]); - 'TNNetLocalConnectReLU' : Result := TNNetLocalConnectReLU.Create(St[0], St[1], St[2], St[3], St[4]); - 'TNNetMulLearning' : Result := TNNetMulLearning.Create(St[0]); - 'TNNetMulByConstant' : Result := TNNetMulByConstant.Create(St[0]); - 'TNNetNegate' : Result := TNNetNegate.Create(); - 'TNNetLayerSoftMax' : Result := TNNetSoftMax.Create(); - 'TNNetSoftMax' : Result := TNNetSoftMax.Create(); - 'TNNetConvolution' : Result := TNNetConvolution.Create(St[0], St[1], St[2], St[3], St[4]); - 'TNNetConvolutionReLU' : Result := TNNetConvolutionReLU.Create(St[0], St[1], St[2], St[3], St[4]); - 'TNNetConvolutionLinear' : Result := TNNetConvolutionLinear.Create(St[0], St[1], St[2], St[3], St[4]); - 'TNNetGroupedConvolutionLinear' : Result := TNNetGroupedConvolutionLinear.Create(St[0], St[1], St[2], St[3], St[5], St[4]); - 'TNNetGroupedConvolutionReLU' : Result := TNNetGroupedConvolutionReLU.Create(St[0], St[1], St[2], St[3], St[5], St[4]); - 'TNNetGroupedPointwiseConvLinear' : Result := TNNetGroupedPointwiseConvLinear.Create({pNumFeatures=}St[0], {pGroups=}St[5], {pSuppressBias=}St[4]); - 'TNNetGroupedPointwiseConvReLU' : Result := TNNetGroupedPointwiseConvReLU.Create({pNumFeatures=}St[0], {pGroups=}St[5], {pSuppressBias=}St[4]); - 'TNNetConvolutionSharedWeights' : Result := TNNetConvolutionSharedWeights.Create(FLayers[St[5]]); - 'TNNetDepthwiseConv' : Result := TNNetDepthwiseConv.Create(St[0], St[1], St[2], St[3]); - 'TNNetDepthwiseConvReLU' : Result := TNNetDepthwiseConvReLU.Create(St[0], St[1], St[2], St[3]); - 'TNNetDepthwiseConvLinear' : Result := TNNetDepthwiseConvLinear.Create(St[0], St[1], St[2], St[3]); - 'TNNetPointwiseConv' : Result := TNNetPointwiseConv.Create(St[0], St[4]); - 'TNNetPointwiseConvReLU' : Result := TNNetPointwiseConvReLU.Create(St[0], St[4]); - 'TNNetPointwiseConvLinear' : Result := TNNetPointwiseConvLinear.Create(St[0], St[4]); - 'TNNetMaxPool' : Result := TNNetMaxPool.Create(St[0], St[1], St[2]); - 'TNNetMaxPoolPortable' : Result := TNNetMaxPoolPortable.Create(St[0], St[1], St[2]); - 'TNNetMinPool' : Result := TNNetMinPool.Create(St[0], St[1], St[2]); - 'TNNetAvgPool' : Result := TNNetAvgPool.Create(St[0]); - 'TNNetAvgChannel': Result := TNNetAvgChannel.Create(); - 'TNNetMaxChannel': Result := TNNetMaxChannel.Create(); - 'TNNetMinChannel': Result := TNNetMinChannel.Create(); - 'TNNetConcat' : Result := TNNetConcat.Create(aL); - 'TNNetDeepConcat' : Result := TNNetDeepConcat.Create(aL); - 'TNNetInterleaveChannels' : Result := TNNetInterleaveChannels.Create(St[0]); - 'TNNetSum' : Result := TNNetSum.Create(aL); - 'TNNetSplitChannels' : Result := TNNetSplitChannels.Create(aIdx); - 'TNNetSplitChannelEvery' : Result := TNNetSplitChannelEvery.Create(aIdx); - 'TNNetDeLocalConnect' : Result := TNNetDeLocalConnect.Create(St[0], St[1], St[4]); - 'TNNetDeLocalConnectReLU' : Result := TNNetDeLocalConnectReLU.Create(St[0], St[1], St[4]); - 'TNNetDeconvolution' : Result := TNNetDeconvolution.Create(St[0], St[1], St[4]); - 'TNNetDeconvolutionReLU' : Result := TNNetDeconvolutionReLU.Create(St[0], St[1], St[4]); - 'TNNetDeMaxPool' : Result := TNNetDeMaxPool.Create(St[0], St[7]); - 'TNNetDeAvgPool' : Result := TNNetDeAvgPool.Create(St[0]); - 'TNNetUpsample' : Result := TNNetUpsample.Create(); - 'TNNetLayerMaxNormalization': Result := TNNetLayerMaxNormalization.Create(); - 'TNNetLayerStdNormalization': Result := TNNetLayerStdNormalization.Create(); - 'TNNetMovingStdNormalization': Result := TNNetMovingStdNormalization.Create(); - 'TNNetChannelStdNormalization': Result := TNNetChannelStdNormalization.Create(); - 'TNNetScaleLearning' : Result := TNNetScaleLearning.Create(); - 'TNNetChannelBias': Result := TNNetChannelBias.Create(); - 'TNNetChannelMul': Result := TNNetChannelMul.Create(); - 'TNNetChannelMulByLayer': Result := TNNetChannelMulByLayer.Create(St[0], St[1]); - 'TNNetCellBias': Result := TNNetCellBias.Create(); - 'TNNetCellMul': Result := TNNetCellMul.Create(); - 'TNNetCellMulByCell': Result := TNNetCellMulByCell.Create(St[0], St[1]); - 'TNNetRandomMulAdd': Result := TNNetRandomMulAdd.Create(St[0], St[1]); - 'TNNetChannelRandomMulAdd': Result := TNNetChannelRandomMulAdd.Create(St[0], St[1]); - 'TNNetChannelZeroCenter': Result := TNNetChannelZeroCenter.Create(); - 'TNNetLocalResponseNorm2D': Result := TNNetLocalResponseNorm2D.Create(St[0]); - 'TNNetLocalResponseNormDepth':Result := TNNetLocalResponseNormDepth.Create(St[0]); - 'TNNetAddAndDiv' :Result := TNNetAddAndDiv.Create(St[0], St[1]); - else - raise Exception.create(strData + ' not allowed in CreateLayer.'); - end; - {$ELSE} - if S[0] = 'TNNetInput' then Result := TNNetInput.Create(St[0], St[1], St[2], St[3]) else - if S[0] = 'TNNetIdentity' then Result := TNNetIdentity.Create() else - if S[0] = 'TNNetDebug' then Result := TNNetDebug.Create(St[0], St[1]) else - if S[0] = 'TNNetPad' then Result := TNNetPad.Create(St[0]) else - if S[0] = 'TNNetIdentityWithoutBackprop' then Result := TNNetIdentityWithoutBackprop.Create() else - if S[0] = 'TNNetReLU' then Result := TNNetReLU.Create() else - if S[0] = 'TNNetSwish' then Result := TNNetSwish.Create() else - if S[0] = 'TNNetSwish6' then Result := TNNetSwish6.Create() else - if S[0] = 'TNNetReLUSqrt' then Result := TNNetReLUSqrt.Create() else - if S[0] = 'TNNetReLUL' then Result := TNNetReLUL.Create(St[0], St[1], St[2]) else - if S[0] = 'TNNetReLU6' then Result := TNNetReLU6.Create(St[2]) else - if S[0] = 'TNNetPower' then Result := TNNetPower.Create(St[0]) else - if S[0] = 'TNNetSELU' then Result := TNNetSELU.Create() else - if S[0] = 'TNNetLeakyReLU' then Result := TNNetLeakyReLU.Create() else - if S[0] = 'TNNetVeryLeakyReLU' then Result := TNNetVeryLeakyReLU.Create() else - if S[0] = 'TNNetSigmoid' then Result := TNNetSigmoid.Create() else - if S[0] = 'TNNetHyperbolicTangent' then Result := TNNetHyperbolicTangent.Create() else - if S[0] = 'TNNetDropout' then Result := TNNetDropout.Create(1/St[0], St[1]) else - if S[0] = 'TNNetReshape' then Result := TNNetReshape.Create(St[0], St[1], St[2]) else - if S[0] = 'TNNetLayerFullConnect' then Result := TNNetFullConnect.Create(St[0], St[1], St[2], St[3]) else - if S[0] = 'TNNetFullConnect' then Result := TNNetFullConnect.Create(St[0], St[1], St[2], St[3]) else - if S[0] = 'TNNetFullConnectSigmoid' then Result := TNNetFullConnectSigmoid.Create(St[0], St[1], St[2], St[3]) else - if S[0] = 'TNNetFullConnectDiff' then Result := TNNetFullConnectDiff.Create(St[0], St[1], St[2], St[3]) else - if S[0] = 'TNNetLayerFullConnectReLU' then Result := TNNetFullConnectReLU.Create(St[0], St[1], St[2], St[3]) else - if S[0] = 'TNNetFullConnectReLU' then Result := TNNetFullConnectReLU.Create(St[0], St[1], St[2], St[3]) else - if S[0] = 'TNNetFullConnectLinear' then Result := TNNetFullConnectLinear.Create(St[0], St[1], St[2], St[3]) else - if S[0] = 'TNNetLocalConnect' then Result := TNNetLocalConnect.Create(St[0], St[1], St[2], St[3], St[4]) else - if S[0] = 'TNNetLocalProduct' then Result := TNNetLocalProduct.Create(St[0], St[1], St[2], St[3], St[4]) else - if S[0] = 'TNNetLocalConnectReLU' then Result := TNNetLocalConnectReLU.Create(St[0], St[1], St[2], St[3], St[4]) else - if S[0] = 'TNNetMulLearning' then Result := TNNetMulLearning.Create(St[0]) else - if S[0] = 'TNNetMulByConstant' then Result := TNNetMulByConstant.Create(St[0]) else - if S[0] = 'TNNetNegate' then Result := TNNetNegate.Create() else - if S[0] = 'TNNetLayerSoftMax' then Result := TNNetSoftMax.Create() else - if S[0] = 'TNNetSoftMax' then Result := TNNetSoftMax.Create() else - if S[0] = 'TNNetConvolution' then Result := TNNetConvolution.Create(St[0], St[1], St[2], St[3], St[4]) else - if S[0] = 'TNNetConvolutionReLU' then Result := TNNetConvolutionReLU.Create(St[0], St[1], St[2], St[3], St[4]) else - if S[0] = 'TNNetConvolutionLinear' then Result := TNNetConvolutionLinear.Create(St[0], St[1], St[2], St[3], St[4]) else - if S[0] = 'TNNetGroupedConvolutionLinear' then Result := TNNetGroupedConvolutionLinear.Create(St[0], St[1], St[2], St[3], St[5], St[4]) else - if S[0] = 'TNNetGroupedConvolutionReLU' then Result := TNNetGroupedConvolutionReLU.Create(St[0], St[1], St[2], St[3], St[5], St[4]) else - if S[0] = 'TNNetGroupedPointwiseConvLinear' then Result := TNNetGroupedPointwiseConvLinear.Create({pNumFeatures=}St[0], {pGroups=}St[5], {pSuppressBias=}St[4]) else - if S[0] = 'TNNetGroupedPointwiseConvReLU' then Result := TNNetGroupedPointwiseConvReLU.Create({pNumFeatures=}St[0], {pGroups=}St[5], {pSuppressBias=}St[4]) else - if S[0] = 'TNNetConvolutionSharedWeights' then Result := TNNetConvolutionSharedWeights.Create(FLayers[St[5]]) else - if S[0] = 'TNNetDepthwiseConv' then Result := TNNetDepthwiseConv.Create(St[0], St[1], St[2], St[3]) else - if S[0] = 'TNNetDepthwiseConvReLU' then Result := TNNetDepthwiseConvReLU.Create(St[0], St[1], St[2], St[3]) else - if S[0] = 'TNNetDepthwiseConvLinear' then Result := TNNetDepthwiseConvLinear.Create(St[0], St[1], St[2], St[3]) else - if S[0] = 'TNNetPointwiseConv' then Result := TNNetPointwiseConv.Create(St[0], St[4]) else - if S[0] = 'TNNetPointwiseConvReLU' then Result := TNNetPointwiseConvReLU.Create(St[0], St[4]) else - if S[0] = 'TNNetPointwiseConvLinear' then Result := TNNetPointwiseConvLinear.Create(St[0], St[4]) else - if S[0] = 'TNNetMaxPool' then Result := TNNetMaxPool.Create(St[0], St[1], St[2]) else - if S[0] = 'TNNetMaxPoolPortable' then Result := TNNetMaxPoolPortable.Create(St[0], St[1], St[2]) else - if S[0] = 'TNNetMinPool' then Result := TNNetMinPool.Create(St[0], St[1], St[2]) else - if S[0] = 'TNNetAvgPool' then Result := TNNetAvgPool.Create(St[0]) else - if S[0] = 'TNNetAvgChannel' then Result := TNNetAvgChannel.Create() else - if S[0] = 'TNNetMaxChannel' then Result := TNNetMaxChannel.Create() else - if S[0] = 'TNNetMinChannel' then Result := TNNetMinChannel.Create() else - if S[0] = 'TNNetConcat' then Result := TNNetConcat.Create(aL) else - if S[0] = 'TNNetInterleaveChannels' then Result := TNNetInterleaveChannels.Create(St[0]) else - if S[0] = 'TNNetDeepConcat' then Result := TNNetDeepConcat.Create(aL) else - if S[0] = 'TNNetSum' then Result := TNNetSum.Create(aL) else - if S[0] = 'TNNetSplitChannels' then Result := TNNetSplitChannels.Create(aIdx) else - if S[0] = 'TNNetSplitChannelEvery' then Result := TNNetSplitChannelEvery.Create(aIdx) else - if S[0] = 'TNNetDeLocalConnect' then Result := TNNetDeLocalConnect.Create(St[0], St[1], St[4]) else - if S[0] = 'TNNetDeLocalConnectReLU' then Result := TNNetDeLocalConnectReLU.Create(St[0], St[1], St[4]) else - if S[0] = 'TNNetDeconvolution' then Result := TNNetDeconvolution.Create(St[0], St[1], St[4]) else - if S[0] = 'TNNetDeconvolutionReLU' then Result := TNNetDeconvolutionReLU.Create(St[0], St[1], St[4]) else - if S[0] = 'TNNetDeMaxPool' then Result := TNNetDeMaxPool.Create(St[0], St[7]) else - if S[0] = 'TNNetDeAvgPool' then Result := TNNetDeAvgPool.Create(St[0]) else - if S[0] = 'TNNetUpsample' then Result := TNNetUpsample.Create() else - if S[0] = 'TNNetLayerMaxNormalization' then Result := TNNetLayerMaxNormalization.Create() else - if S[0] = 'TNNetLayerStdNormalization' then Result := TNNetLayerStdNormalization.Create() else - if S[0] = 'TNNetMovingStdNormalization' then Result := TNNetMovingStdNormalization.Create() else - if S[0] = 'TNNetChannelStdNormalization' then Result := TNNetChannelStdNormalization.Create() else - if S[0] = 'TNNetScaleLearning' then Result := TNNetChannelStdNormalization.Create() else - if S[0] = 'TNNetChannelBias' then Result := TNNetChannelBias.Create() else - if S[0] = 'TNNetChannelMul' then Result := TNNetChannelMul.Create() else - if S[0] = 'TNNetChannelMulByLayer' then Result := TNNetChannelMulByLayer.Create(St[0], St[1]) else - if S[0] = 'TNNetCellBias' then Result := TNNetCellBias.Create() else - if S[0] = 'TNNetCellMul' then Result := TNNetCellMul.Create() else - if S[0] = 'TNNetCellMulByCell' then Result := TNNetCellMulByCell.Create(St[0], St[1]) else - if S[0] = 'TNNetRandomMulAdd' then Result := TNNetRandomMulAdd.Create(St[0], St[1]) else - if S[0] = 'TNNetChannelRandomMulAdd' then Result := TNNetChannelRandomMulAdd.Create(St[0], St[1]) else - if S[0] = 'TNNetChannelZeroCenter' then Result := TNNetChannelZeroCenter.Create() else - if S[0] = 'TNNetLocalResponseNorm2D' then Result := TNNetLocalResponseNorm2D.Create(St[0]) else - if S[0] = 'TNNetLocalResponseNormDepth' then Result := TNNetLocalResponseNormDepth.Create(St[0]) else - if S[0] = 'TNNetAddAndDiv' then Result := TNNetAddAndDiv.Create(St[0], St[1]) else - raise Exception.create(strData + ' not allowed in CreateLayer.'); - {$ENDIF} + if S[0] = 'TNNetInput' then Result := TNNetInput.Create(St[0], St[1], St[2], St[3]) else + if S[0] = 'TNNetIdentity' then Result := TNNetIdentity.Create() else + if S[0] = 'TNNetDebug' then Result := TNNetDebug.Create(St[0], St[1]) else + if S[0] = 'TNNetPad' then Result := TNNetPad.Create(St[0]) else + if S[0] = 'TNNetIdentityWithoutBackprop' then Result := TNNetIdentityWithoutBackprop.Create() else + if S[0] = 'TNNetReLU' then Result := TNNetReLU.Create() else + if S[0] = 'TNNetSwish' then Result := TNNetSwish.Create() else + if S[0] = 'TNNetSwish6' then Result := TNNetSwish6.Create() else + if S[0] = 'TNNetReLUSqrt' then Result := TNNetReLUSqrt.Create() else + if S[0] = 'TNNetReLUL' then Result := TNNetReLUL.Create(St[0], St[1], St[2]) else + if S[0] = 'TNNetReLU6' then Result := TNNetReLU6.Create(St[2]) else + if S[0] = 'TNNetPower' then Result := TNNetPower.Create(St[0]) else + if S[0] = 'TNNetSELU' then Result := TNNetSELU.Create() else + if S[0] = 'TNNetLeakyReLU' then Result := TNNetLeakyReLU.Create() else + if S[0] = 'TNNetVeryLeakyReLU' then Result := TNNetVeryLeakyReLU.Create() else + if S[0] = 'TNNetSigmoid' then Result := TNNetSigmoid.Create() else + if S[0] = 'TNNetHyperbolicTangent' then Result := TNNetHyperbolicTangent.Create() else + if S[0] = 'TNNetDropout' then Result := TNNetDropout.Create(1/St[0], St[1]) else + if S[0] = 'TNNetReshape' then Result := TNNetReshape.Create(St[0], St[1], St[2]) else + if S[0] = 'TNNetLayerFullConnect' then Result := TNNetFullConnect.Create(St[0], St[1], St[2], St[3]) else + if S[0] = 'TNNetFullConnect' then Result := TNNetFullConnect.Create(St[0], St[1], St[2], St[3]) else + if S[0] = 'TNNetFullConnectSigmoid' then Result := TNNetFullConnectSigmoid.Create(St[0], St[1], St[2], St[3]) else + if S[0] = 'TNNetFullConnectDiff' then Result := TNNetFullConnectDiff.Create(St[0], St[1], St[2], St[3]) else + if S[0] = 'TNNetLayerFullConnectReLU' then Result := TNNetFullConnectReLU.Create(St[0], St[1], St[2], St[3]) else + if S[0] = 'TNNetFullConnectReLU' then Result := TNNetFullConnectReLU.Create(St[0], St[1], St[2], St[3]) else + if S[0] = 'TNNetFullConnectLinear' then Result := TNNetFullConnectLinear.Create(St[0], St[1], St[2], St[3]) else + if S[0] = 'TNNetLocalConnect' then Result := TNNetLocalConnect.Create(St[0], St[1], St[2], St[3], St[4]) else + if S[0] = 'TNNetLocalProduct' then Result := TNNetLocalProduct.Create(St[0], St[1], St[2], St[3], St[4]) else + if S[0] = 'TNNetLocalConnectReLU' then Result := TNNetLocalConnectReLU.Create(St[0], St[1], St[2], St[3], St[4]) else + if S[0] = 'TNNetMulLearning' then Result := TNNetMulLearning.Create(St[0]) else + if S[0] = 'TNNetMulByConstant' then Result := TNNetMulByConstant.Create(St[0]) else + if S[0] = 'TNNetNegate' then Result := TNNetNegate.Create() else + if S[0] = 'TNNetLayerSoftMax' then Result := TNNetSoftMax.Create() else + if S[0] = 'TNNetSoftMax' then Result := TNNetSoftMax.Create() else + if S[0] = 'TNNetConvolution' then Result := TNNetConvolution.Create(St[0], St[1], St[2], St[3], St[4]) else + if S[0] = 'TNNetConvolutionReLU' then Result := TNNetConvolutionReLU.Create(St[0], St[1], St[2], St[3], St[4]) else + if S[0] = 'TNNetConvolutionGeLU' then Result := TNNetConvolutionGeLU.Create(St[0], St[1], St[2], St[3], St[4]) else + if S[0] = 'TNNetConvolutionSwish6' then Result := TNNetConvolutionSwish6.Create(St[0], St[1], St[2], St[3], St[4]) else + if S[0] = 'TNNetConvolutionSwish' then Result := TNNetConvolutionSwish.Create(St[0], St[1], St[2], St[3], St[4]) else + if S[0] = 'TNNetConvolutionLinear' then Result := TNNetConvolutionLinear.Create(St[0], St[1], St[2], St[3], St[4]) else + if S[0] = 'TNNetGroupedConvolutionLinear' then Result := TNNetGroupedConvolutionLinear.Create(St[0], St[1], St[2], St[3], St[5], St[4]) else + if S[0] = 'TNNetGroupedConvolutionReLU' then Result := TNNetGroupedConvolutionReLU.Create(St[0], St[1], St[2], St[3], St[5], St[4]) else + if S[0] = 'TNNetGroupedPointwiseConvLinear' then Result := TNNetGroupedPointwiseConvLinear.Create({pNumFeatures=}St[0], {pGroups=}St[5], {pSuppressBias=}St[4]) else + if S[0] = 'TNNetGroupedPointwiseConvReLU' then Result := TNNetGroupedPointwiseConvReLU.Create({pNumFeatures=}St[0], {pGroups=}St[5], {pSuppressBias=}St[4]) else + if S[0] = 'TNNetConvolutionSharedWeights' then Result := TNNetConvolutionSharedWeights.Create(FLayers[St[5]]) else + if S[0] = 'TNNetDepthwiseConv' then Result := TNNetDepthwiseConv.Create(St[0], St[1], St[2], St[3]) else + if S[0] = 'TNNetDepthwiseConvReLU' then Result := TNNetDepthwiseConvReLU.Create(St[0], St[1], St[2], St[3]) else + if S[0] = 'TNNetDepthwiseConvGeLU' then Result := TNNetDepthwiseConvGeLU.Create(St[0], St[1], St[2], St[3]) else + if S[0] = 'TNNetDepthwiseConvLinear' then Result := TNNetDepthwiseConvLinear.Create(St[0], St[1], St[2], St[3]) else + if S[0] = 'TNNetPointwiseConv' then Result := TNNetPointwiseConv.Create(St[0], St[4]) else + if S[0] = 'TNNetPointwiseConvReLU' then Result := TNNetPointwiseConvReLU.Create(St[0], St[4]) else + if S[0] = 'TNNetPointwiseConvLinear' then Result := TNNetPointwiseConvLinear.Create(St[0], St[4]) else + if S[0] = 'TNNetMaxPool' then Result := TNNetMaxPool.Create(St[0], St[1], St[2]) else + if S[0] = 'TNNetMaxPoolPortable' then Result := TNNetMaxPoolPortable.Create(St[0], St[1], St[2]) else + if S[0] = 'TNNetMinPool' then Result := TNNetMinPool.Create(St[0], St[1], St[2]) else + if S[0] = 'TNNetAvgPool' then Result := TNNetAvgPool.Create(St[0]) else + if S[0] = 'TNNetAvgChannel' then Result := TNNetAvgChannel.Create() else + if S[0] = 'TNNetMaxChannel' then Result := TNNetMaxChannel.Create() else + if S[0] = 'TNNetMinChannel' then Result := TNNetMinChannel.Create() else + if S[0] = 'TNNetConcat' then Result := TNNetConcat.Create(aL) else + if S[0] = 'TNNetInterleaveChannels' then Result := TNNetInterleaveChannels.Create(St[0]) else + if S[0] = 'TNNetDeepConcat' then Result := TNNetDeepConcat.Create(aL) else + if S[0] = 'TNNetSum' then Result := TNNetSum.Create(aL) else + if S[0] = 'TNNetSplitChannels' then Result := TNNetSplitChannels.Create(aIdx) else + if S[0] = 'TNNetSplitChannelEvery' then Result := TNNetSplitChannelEvery.Create(aIdx) else + if S[0] = 'TNNetDeLocalConnect' then Result := TNNetDeLocalConnect.Create(St[0], St[1], St[4]) else + if S[0] = 'TNNetDeLocalConnectReLU' then Result := TNNetDeLocalConnectReLU.Create(St[0], St[1], St[4]) else + if S[0] = 'TNNetDeconvolution' then Result := TNNetDeconvolution.Create(St[0], St[1], St[4]) else + if S[0] = 'TNNetDeconvolutionReLU' then Result := TNNetDeconvolutionReLU.Create(St[0], St[1], St[4]) else + if S[0] = 'TNNetDeMaxPool' then Result := TNNetDeMaxPool.Create(St[0], St[7]) else + if S[0] = 'TNNetDeAvgPool' then Result := TNNetDeAvgPool.Create(St[0]) else + if S[0] = 'TNNetUpsample' then Result := TNNetUpsample.Create() else + if S[0] = 'TNNetLayerMaxNormalization' then Result := TNNetLayerMaxNormalization.Create() else + if S[0] = 'TNNetLayerStdNormalization' then Result := TNNetLayerStdNormalization.Create() else + if S[0] = 'TNNetMovingStdNormalization' then Result := TNNetMovingStdNormalization.Create() else + if S[0] = 'TNNetChannelStdNormalization' then Result := TNNetChannelStdNormalization.Create() else + if S[0] = 'TNNetScaleLearning' then Result := TNNetChannelStdNormalization.Create() else + if S[0] = 'TNNetChannelBias' then Result := TNNetChannelBias.Create() else + if S[0] = 'TNNetChannelMul' then Result := TNNetChannelMul.Create() else + if S[0] = 'TNNetChannelMulByLayer' then Result := TNNetChannelMulByLayer.Create(St[0], St[1]) else + if S[0] = 'TNNetCellBias' then Result := TNNetCellBias.Create() else + if S[0] = 'TNNetCellMul' then Result := TNNetCellMul.Create() else + if S[0] = 'TNNetCellMulByCell' then Result := TNNetCellMulByCell.Create(St[0], St[1]) else + if S[0] = 'TNNetRandomMulAdd' then Result := TNNetRandomMulAdd.Create(St[0], St[1]) else + if S[0] = 'TNNetChannelRandomMulAdd' then Result := TNNetChannelRandomMulAdd.Create(St[0], St[1]) else + if S[0] = 'TNNetChannelZeroCenter' then Result := TNNetChannelZeroCenter.Create() else + if S[0] = 'TNNetLocalResponseNorm2D' then Result := TNNetLocalResponseNorm2D.Create(St[0]) else + if S[0] = 'TNNetLocalResponseNormDepth' then Result := TNNetLocalResponseNormDepth.Create(St[0]) else + if S[0] = 'TNNetAddAndDiv' then Result := TNNetAddAndDiv.Create(St[0], St[1]) else + raise Exception.create(strData + ' not allowed in CreateLayer.'); end else begin @@ -10775,8 +10754,10 @@ procedure TNNet.Compute(pInput, pOutput: TNNetVolumeList; FromLayerIdx: integer var AuxOutput: TNNetVolume; MaxIdxInput, IdxInput: integer; + cancel : boolean; begin MaxIdxInput := pInput.Count - 1; + cancel := False; if MaxIdxInput >=0 then begin AuxOutput := TNNetVolume.Create(); @@ -10798,6 +10779,14 @@ procedure TNNet.Compute(pInput, pOutput: TNNetVolumeList; FromLayerIdx: integer begin MessageProc(IntToStr(IdxInput)+' processed.'); end; + + if Assigned(fProgress) then + begin + fProgress( self, idxInput, maxIdxInput + 1, cancel); + // check if the user wants to stop the loop + if cancel then + break; + end; end; AuxOutput.Free; end; @@ -11061,7 +11050,9 @@ procedure TNNet.CopyWeights(Origin: TNNet); function TNNet.ForceMaxAbsoluteDelta(vMax: TNeuralFloat): TNeuralFloat; var LayerCnt: integer; + {$IFDEF Debug} LayerMul: TNeuralFloat; + {$ENDIF} begin Result := 1; if FLayers.Count > 0 then @@ -11070,8 +11061,11 @@ function TNNet.ForceMaxAbsoluteDelta(vMax: TNeuralFloat): TNeuralFloat; begin if not(FLayers[LayerCnt].LinkedNeurons) then begin - LayerMul := FLayers[LayerCnt].ForceMaxAbsoluteDelta(vMax); + {$IFNDEF Debug} + FLayers[LayerCnt].ForceMaxAbsoluteDelta(vMax); + {$ENDIF} {$IFDEF Debug} + LayerMul := FLayers[LayerCnt].ForceMaxAbsoluteDelta(vMax); if LayerMul < Result then begin Result := LayerMul; @@ -11178,6 +11172,9 @@ procedure TNNet.EnableOpenCL(platform_id: cl_platform_id; LayerCnt: integer; begin FDotProductKernel := TDotProductCL.Create(platform_id, device_id); + FDotProductKernel.MessageProc := Self.MessageProc; + FDotProductKernel.ErrorProc := self.ErrorProc; + FDotProductKernel.Prepare; for LayerCnt := 0 to GetLastLayerIdx() do begin FLayers[LayerCnt].EnableOpenCL(FDotProductKernel); @@ -11678,13 +11675,29 @@ procedure TNNet.LoadFromFile(filename: string); end; function TNNet.Clone(): TNNet; -var - NNData: String; +//var NNData: String; +//begin +// NNData := SaveToString(); +// +// Result := TNNet.Create; +// Result.LoadFromString(NNData); +//end; + +var i : integer; + layStruct : string; begin - NNData := SaveToString(); + // it's not optimal but it covers the basis + layStruct := SaveStructureToString(); + + Result := TNNet.Create; + Result.LoadStructureFromString(layStruct); - Result := TNNet.Create; - Result.LoadFromString(NNData); + for i := 0 to FLayers.Count - 1 do + begin + // copy weights... basically reproduces LoadDataFromString but without all the overhead + Result.fLayers[i].Assign( FLayers[i] ); + Result.FLayers[i].AfterWeightUpdate; + end; end; procedure TNNet.LoadDataFromString(strData: string); @@ -11822,6 +11835,45 @@ procedure TNNetLayer.ApplyActivationFunctionToOutput(); end; end; +procedure TNNetLayer.Assign(layer: TNNetLayer); +var i : integer; +begin + // ########################################### + // #### Assign all elements common with the layer object + //FStruct := layer.FStruct; +// fOutput.Copy(layer.FOutput); +// FOutputRaw.Copy(layer.FOutputRaw); +// FOutputError.Copy(layer.FOutputError); +// FOutputErrorDeriv.Copy(FOutputErrorDeriv); +// FSuppressBias := layer.FSuppressBias; +// +// FNeurons := TNNetNeuronList.Create(); +// for i := 0 to layer.FNeurons.Count - 1 do +// fNeurons.Add(layer.FNeurons[i].Clone); +// +// AfterWeightUpdate; + + + assert( layer.FNeurons.Count = FNeurons.Count, 'neuron count does not check'); + for i := 0 to layer.FNeurons.Count - 1 do + TNNetNeuron(fNeurons[i]).Assign( layer.FNeurons[i] ); + + + //FLinkedNeurons := layer.FLinkedNeurons; +// FActivationFn := layer.FActivationFn; +// FActivationFnDerivative := layer.FActivationFnDerivative; +// FLearningRate := layer.FLearningRate; +// FL2Decay := layer.FL2Decay; +// //FPrevLayer := nil; +// FInertia := layer.FInertia; +// FBatchUpdate := layer.FBatchUpdate; +// FSmoothErrorPropagation := layer.FSmoothErrorPropagation; +// FDepartingBranchesCnt := layer.FDepartingBranchesCnt; +// FBackPropCallCurrentCnt := layer.FBackPropCallCurrentCnt; +// FBackwardTime := 0; +// FForwardTime := 0; +end; + procedure TNNetLayer.BuildArrNeurons(); var NeuronIdx: integer; @@ -13050,6 +13102,17 @@ procedure TNNetNeuron.ClearDelta; FBiasDelta := 0; end; +procedure TNNetNeuron.Assign(neuron: TNNetNeuron); +begin + FWeights.Copy( neuron.fWeights ); + FBackInertia.Copy(neuron.fBackInertia); + FDelta.Copy(neuron.FDelta); + + FBiasWeight := neuron.fBiasWeight; + FBiasInertia := neuron.FBiasInertia; + FBiasDelta := neuron.FBiasDelta; +end; + constructor TEasyBytePredictionViaNNet.Create(pActionByteLen, pStateByteLen: word; NumNeurons: integer; CacheSize: integer); @@ -13124,10 +13187,11 @@ procedure TBytePredictionViaNNet.Predict(var pActions, begin ABCopy(aActions, pActions); ABCopy(aCurrentState, pCurrentState); + idxCache := -1; if FUseCache then idxCache := FCache.Read(pActions, pPredictedState); Equal := ABCmp(pActions, pCurrentState); - if FUseCache and (idxCache <> -1) and Equal then + if (idxCache <> -1) and Equal then begin FCached := True; end diff --git a/neural/neuralopencl.pas b/neural/neuralopencl.pas index c2617e46..71c88250 100644 --- a/neural/neuralopencl.pas +++ b/neural/neuralopencl.pas @@ -35,7 +35,7 @@ interface uses - Classes, SysUtils, cl, {$IFDEF FPC}ctypes{$ELSE}Winapi.Windows,AnsiStrings,CL_Platform{$ENDIF}, neuralvolume; + Classes, SysUtils, cl, {$IFDEF FPC}ctypes{$ELSE}Windows,AnsiStrings,CL_Platform{$ENDIF}, neuralvolume; type {$IFDEF FPC} @@ -84,14 +84,14 @@ TEasyOpenCL = class(TMObject) FCompilerOptions: ShortString; {$ENDIF} - procedure LoadPlatforms(); procedure FreeContext(); procedure CompileProgram(); overload; public - constructor Create(); override; + constructor Create(); //override; destructor Destroy(); override; + procedure LoadPlatforms(); procedure printDevicesInfo(); function GetPlatformCount(): integer; function GetDeviceCount(): integer; @@ -160,11 +160,13 @@ TEasyOpenCLV = class (TEasyOpenCL) TNeuralKernel = class(TEasyOpenCLV) private + fkernelname : string; /// OpenCL Kernel FKernel: cl_kernel; function PrepareKernel(kernelname: string = 'cai_dot_product'): integer; procedure UnprepareKernel(); public + procedure Prepare; constructor Create(pCurrentPlatform: cl_platform_id; pCurrentDevice: cl_device_id; kernelname: string = 'cai_dot_product'); destructor Destroy(); override; @@ -396,7 +398,14 @@ function TDotProductSharedKernel.PrepareForCompute(VAs, VBs: TNNetVolume; FResultBuffer := FDotProductKernel.CreateOutputBuffer(FNumAs * FNumBs * SizeOf(TNeuralFloat)); FPreviousComputeTime := 0; - PrepareForCompute := CL_SUCCESS; + if Assigned(FResultBuffer) and Assigned(FInputBufferAs) and Assigned(FInputBufferBs) + then + PrepareForCompute := CL_SUCCESS + else + begin + UnprepareForCompute; + Result := CL_INVALID_MEM_OBJECT; + end; end; procedure TDotProductSharedKernel.Compute @@ -550,6 +559,39 @@ procedure TDotProductSharedKernel.FinishAndLoadResult(Results: TNNetVolume; end; end; +procedure TNeuralKernel.Prepare; +var resStream : TResourceStream; +begin + // ########################################### + // #### Check if the neural.cl file is part of the resources + try + resStream := TResourceStream.Create(hInstance, 'NeuralCL', RT_RCDATA); + FOpenCLProgramSource.LoadFromStream(resStream, TEncoding.UTF8); + + resStream.Free; + CompileProgram(); + PrepareKernel(fkernelname); + exit; + except + MessageProc('Resource NeuralCL not found - try to open file...'); + end; + + // Create the OpenCL Kernel Here: + if FileExists('../../../neural/neural.cl') then + begin + CompileProgramFromFile('../../../neural/neural.cl'); + end + else if FileExists('neural.cl') then + begin + CompileProgramFromFile('neural.cl'); + end + else + begin + MessageProc('File neural.cl could not be found.'); + end; + PrepareKernel(fkernelname); +end; + function TNeuralKernel.PrepareKernel(kernelname: string): integer; begin UnprepareKernel(); @@ -569,21 +611,7 @@ constructor TNeuralKernel.Create(pCurrentPlatform: cl_platform_id; inherited Create(); SetCurrentPlatform(pCurrentPlatform); SetCurrentDevice(pCurrentDevice); - - // Create the OpenCL Kernel Here: - if FileExists('../../../neural/neural.cl') then - begin - CompileProgramFromFile('../../../neural/neural.cl'); - end - else if FileExists('neural.cl') then - begin - CompileProgramFromFile('neural.cl'); - end - else - begin - MessageProc('File neural.cl could not be found.'); - end; - PrepareKernel(kernelname); + fkernelname := kernelname; end; destructor TNeuralKernel.Destroy(); @@ -834,6 +862,12 @@ function TEasyOpenCLV.WriteBuffer(buffer: cl_mem; V: TNNetVolume; blocking: cl_b function TEasyOpenCLV.ReadBuffer(buffer: cl_mem; V: TNNetVolume; blocking: cl_bool): integer; begin Result := ReadBuffer(buffer, V.GetMemSize(), V.DataPtr, blocking); + + if Result <> CL_SUCCESS then + begin + FErrorProc(Format( 'Error: %p, %p, %d', [buffer, V.DataPtr, V.GetMemSize ])); + end; + end; function TEasyOpenCLV.CreateAndWriteBuffer(V: TNNetVolume; var buffer: cl_mem @@ -899,11 +933,15 @@ procedure TEasyOpenCL.LoadPlatforms(); begin {$IFDEF FPC} err := clGetPlatformInfo(local_platformids[i], CL_PLATFORM_NAME, sizeof(buf), @buf, bufwritten); - FPlatformNames[i] := buf; + if err <> CL_SUCCESS then + FErrorProc('ERROR: ' + GetString(err) ); + FPlatformNames[i] := string(buf); FPlatformIds[i] := local_platformids[i]; {$ELSE} err := clGetPlatformInfo(local_platformids^, CL_PLATFORM_NAME, sizeof(buf), @buf, @bufwritten); - FPlatformNames[i] := buf; + if err <> CL_SUCCESS then + FErrorProc('ERROR: ' + String(GetString(err) ) ); + FPlatformNames[i] := string(buf); FPlatformIds[i] := local_platformids^; Inc(local_platformids); {$ENDIF} @@ -933,7 +971,11 @@ procedure TEasyOpenCL.CompileProgram(); {$IFDEF FPC} localKernelSource := FOpenCLProgramSource.GetText(); {$ELSE} + {$if CompilerVersion >= 23} localKernelSource := AnsiStrings.StrNew(PAnsiChar(AnsiString(FOpenCLProgramSource.Text))); + {$ELSE} + localKernelSource := PAnsiChar(AnsiString(FOpenCLProgramSource.Text)); + {$IFEND} {$ENDIF} // Create a compute context @@ -965,7 +1007,7 @@ procedure TEasyOpenCL.CompileProgram(); {$ENDIF} if FProg = nil then begin - FMessageProc(localKernelSource); + FMessageProc(String(localKernelSource)); FErrorProc('Error: Failed to create compute program:' + IntToStr(err)); exit; end @@ -973,19 +1015,19 @@ procedure TEasyOpenCL.CompileProgram(); FMessageProc('clCreateProgramWithSource OK!'); localCompilerOptions := {$IFDEF FPC}StrAlloc{$ELSE}AnsiStrAlloc{$ENDIF}(length(FCompilerOptions)+1); - {$IFDEF FPC}StrPCopy{$ELSE}AnsiStrings.StrPCopy{$ENDIF}(localCompilerOptions,FCompilerOptions); + {$IFDEF FPC}StrPCopy{$ELSE} {$IF CompilerVersion >= 23}AnsiStrings.StrPCopy{$ELSE}StrPCopy{$IFEND} {$ENDIF}(localCompilerOptions,FCompilerOptions); // Build the program executable err := clBuildProgram(FProg, 0, nil, localCompilerOptions, nil, nil); - {$IFDEF FPC}StrDispose{$ELSE}AnsiStrings.StrDispose{$ENDIF}(localCompilerOptions); + {$IFDEF FPC}StrDispose{$ELSE}{$IF CompilerVersion >= 23}AnsiStrings.StrDispose{$ELSE}StrDispose{$IFEND}{$ENDIF}(localCompilerOptions); if (err <> CL_SUCCESS) then begin errorlog := @errorlogstr[1]; loglen := SizeOf(errorlogstr); clGetProgramBuildInfo(FProg, FCurrentDevice, CL_PROGRAM_BUILD_LOG, SizeOf(errorlogstr), errorlog, {$IFDEF FPC}loglen{$ELSE}@loglen{$ENDIF}); - FErrorProc('Error: Failed to build program executable:' + IntToStr(err) + ' ' + errorlog); + FErrorProc('Error: Failed to build program executable:' + IntToStr(err) + ' ' + String(errorlog)); exit; end else @@ -1009,7 +1051,7 @@ procedure TEasyOpenCL.printDevicesInfo(); for k := low(platform_str_info) to high(platform_str_info) do begin clGetPlatformInfo(FPlatformIds[i], platform_str_info[k].id, sizeof(buf), @buf, {$IFDEF FPC}bufwritten{$ELSE}@bufwritten{$ENDIF}); - MessageProc(platform_str_info[k].Name + ': ' + buf); + MessageProc(platform_str_info[k].Name + ': ' + String(buf)); end; GetDevicesFromPlatform(FPlatformIds[i], local_devices, local_deviceids); @@ -1022,7 +1064,7 @@ procedure TEasyOpenCL.printDevicesInfo(); for k := low(device_str_info) to high(device_str_info) do begin clGetDeviceInfo(local_deviceids[j], device_str_info[k].id, sizeof(buf), @buf, {$IFDEF FPC}bufwritten{$ELSE}@bufwritten{$ENDIF}); - MessageProc(device_str_info[k].Name + ': ' + buf); + MessageProc(device_str_info[k].Name + ': ' + String(buf)); end; for k := low(device_word_info) to high(device_word_info) do @@ -1070,22 +1112,32 @@ procedure TEasyOpenCL.GetDevicesFromPlatform(PlatformId: cl_platform_id; out pDe firstpointer := local_deviceids; err := clGetDeviceIDs(PlatformId, CL_DEVICE_TYPE_ALL, local_devices, local_deviceids, nil); - if (local_devices > 0) then + if err = CL_SUCCESS then begin - for j := 0 to local_devices - 1 do + if (local_devices > 0) then begin - {$IFDEF FPC} - err := clGetDeviceInfo(local_deviceids[j], CL_DEVICE_NAME, sizeof(buf), @buf, bufwritten); - pDeviceNames[j] := buf; - pDevices[j] := local_deviceids[j]; - {$ELSE} - err := clGetDeviceInfo(local_deviceids^, CL_DEVICE_NAME, sizeof(buf), @buf, @bufwritten); - pDeviceNames[j] := buf; - pDevices[j] := local_deviceids^; - Inc(local_deviceids); - {$ENDIF} + for j := 0 to local_devices - 1 do + begin + {$IFDEF FPC} + err := clGetDeviceInfo(local_deviceids[j], CL_DEVICE_NAME, sizeof(buf), @buf, bufwritten); + if err = CL_SUCCESS then + FErrorProc('ERROR: ' + GetString(err)); + pDeviceNames[j] := buf; + pDevices[j] := local_deviceids[j]; + {$ELSE} + err := clGetDeviceInfo(local_deviceids^, CL_DEVICE_NAME, sizeof(buf), @buf, @bufwritten); + if err <> CL_SUCCESS then + FErrorProc('ERROR: ' + String(GetString(err))); + pDeviceNames[j] := String(buf); + pDevices[j] := local_deviceids^; + Inc(local_deviceids); + {$ENDIF} + end; end; - end; + end + else + FErrorProc('ERROR: ' + String(GetString( err ) )); + freemem(firstpointer); end; end; @@ -1224,7 +1276,7 @@ function TEasyOpenCL.CreateKernel(kernelname: string): cl_kernel; begin err := 0; localKernelName := {$IFDEF FPC}StrAlloc{$ELSE}AnsiStrAlloc{$ENDIF}(length(kernelname)+1); - {$IFDEF FPC}StrPCopy{$ELSE}AnsiStrings.StrPCopy{$ENDIF}(localKernelName,kernelname); + {$IFDEF FPC}StrPCopy{$ELSE}{$IF CompilerVersion >= 23}AnsiStrings.StrPCopy{$ELSE}StrPCopy{$IFEND}{$ENDIF}(localKernelName,AnsiString(kernelname)); // Create the compute kernel in the program we wish to run Result := clCreateKernel(prog, localKernelName, {$IFDEF FPC}err{$ELSE}@err{$ENDIF}); @@ -1236,7 +1288,7 @@ function TEasyOpenCL.CreateKernel(kernelname: string): cl_kernel; begin FMessageProc('clCreateKernel '+kernelname+' OK!'); end; - {$IFDEF FPC}StrDispose{$ELSE}AnsiStrings.StrDispose{$ENDIF}(localKernelName); + {$IFDEF FPC}StrDispose{$ELSE}{$IF CompilerVersion >= 23}AnsiStrings.StrDispose{$ELSE}StrDispose{$IFEND} {$ENDIF}(localKernelName); end; function TEasyOpenCL.RunKernel(pkernel: cl_kernel; ThreadCount: integer): integer; @@ -1369,7 +1421,6 @@ constructor TEasyOpenCL.Create(); MessageProc := Self.DefaultMessageProc; ErrorProc := Self.DefaultErrorProc; {$ENDIF} - LoadPlatforms(); SetLength(FDeviceNames, 0); SetLength(FDevices, 0); diff --git a/neural/neuralthread.pas b/neural/neuralthread.pas index 550ed06c..59761766 100644 --- a/neural/neuralthread.pas +++ b/neural/neuralthread.pas @@ -169,7 +169,7 @@ procedure CreateNeuralThreadListIfRequired(); begin if Not(Assigned(vNTL)) then begin - NeuralThreadListCreate(TThread.ProcessorCount); + NeuralThreadListCreate(System.CPUCount); end; end; @@ -178,7 +178,7 @@ function NeuralDefaultThreadCount: integer; {$IFDEF FPC} Result := GetSystemThreadCount; {$ELSE} - Result := TThread.ProcessorCount; + Result := System.CPUCount; {$ENDIF} end; diff --git a/neural/neuralvolume.pas b/neural/neuralvolume.pas index 32e9ad62..c52ba1d9 100644 --- a/neural/neuralvolume.pas +++ b/neural/neuralvolume.pas @@ -201,7 +201,7 @@ TVolume = class(TObject) procedure CopyChannels(Original: TVolume; aChannels: array of integer); procedure Define(Original: array of T); function DotProduct(Original: TVolume): T; overload; {$IFDEF Release} inline; {$ENDIF} - class function DotProduct(PtrA, PtrB: TNeuralFloatArrPtr; NumElements: integer): Single; overload; {$IFDEF Release} inline; {$ENDIF} + class function DotProduct(PtrA, PtrB: TNeuralFloatArrPtr; NumElements: integer): Single; overload; class function Product(PtrA: TNeuralFloatArrPtr; NumElements: integer): Single; overload; {$IFDEF Release} inline; {$ENDIF} function SumDiff(Original: TVolume): T; {$IFDEF Release} inline; {$ENDIF} procedure DebugDiff(Original: TVolume; Limit: Single = 0); @@ -321,9 +321,9 @@ TNNetVolume = class (TVolume) procedure DotProductsTiled(NumAs, NumBs, VectorSize: integer; VAs, VBs: TNNetVolume; TileSizeA, TileSizeB: integer); procedure GroupedDotProductsTiled(Groups, NumAs, NumBs, VectorSize: integer; VAs, VBs: TNNetVolume; TileSizeA, TileSizeB: integer); procedure AddArea(DestX, DestY, OriginX, OriginY, LenX, LenY: integer; Original: TNNetVolume); - function HasAVX: boolean; {$IFDEF Release} inline; {$ENDIF} - function HasAVX2: boolean; {$IFDEF Release} inline; {$ENDIF} - function HasAVX512: boolean; {$IFDEF Release} inline; {$ENDIF} + function HasAVX: boolean; + function HasAVX2: boolean; + function HasAVX512: boolean; function PearsonCorrelation(Y : TNNetVolume): TNeuralFloat; procedure AddSumChannel(Original: TNNetVolume); {$IFDEF Release} inline; {$ENDIF} procedure AddSumSqrChannel(Original: TNNetVolume); {$IFDEF Release} inline; {$ENDIF} @@ -375,8 +375,7 @@ TNNetVolumePair = class(TObject) FB: TNNetVolume; public constructor Create(); overload; - constructor Create(pA, pB: TNNetVolume); overload; - constructor CreateCopying(pA, pB: TNNetVolume); overload; + constructor Create(pA, pB: TNNetVolume; createCopy : boolean = False); overload; destructor Destroy(); override; @@ -391,9 +390,8 @@ TMObject = class(TObject) protected FMessageProc: TGetStrProc; FErrorProc: TGetStrProc; - public - constructor Create(); virtual; + constructor Create(); //virtual; destructor Destroy(); override; procedure DefaultMessageProc(const S: string); @@ -401,7 +399,6 @@ TMObject = class(TObject) procedure DefaultHideMessages(const S: string); procedure HideMessages(); - published property MessageProc: TGetStrProc read FMessageProc write FMessageProc; property ErrorProc: TGetStrProc read FErrorProc write FErrorProc; end; @@ -558,6 +555,14 @@ TNNetDictionary = class(TStringListInt) function ReLULeakyBound(x: TNeuralFloat): TNeuralFloat; function ReLULeakyBoundDerivative(x: TNeuralFloat): TNeuralFloat; + function GaussErrorLinUnit(x : TNeuralFloat) : TNeuralFloat; + function GaussErrorLinUnitDerivative(x : TNeuralFloat) : TNeuralFloat; + + function Swish6Unit(x : TNeuralFloat) : TNeuralFloat; + function Swish6Derivative(x : TNeuralFloat) : TNeuralFloat; + function SwishUnit(x : TNeuralFloat) : TNeuralFloat; + function SwishDerivative(x : TNeuralFloat) : TNeuralFloat; + function Sigmoid(x: TNeuralFloat): TNeuralFloat; function SigmoidDerivative(x: TNeuralFloat): TNeuralFloat; @@ -599,8 +604,15 @@ TNNetDictionary = class(TStringListInt) implementation -uses - Math, neuralbit; +{$IFDEF CPUX64} +{$DEFINE x64} +{$ENDIF} +{$IFDEF cpux86_64} +{$DEFINE x64} +{$ENDIF} + +uses {$IFNDEF x64} Neural.AVX {$ELSE} Neural.AVXx64{$ENDIF}, neuralbit, + Math, CPUFeatures; function CreateTokenizedStringList(str: string; c:char):TStringList; begin @@ -1500,6 +1512,109 @@ function RectifiedLinearUnitDerivative(x: TNeuralFloat): TNeuralFloat; else Result := 0; end; +// paper: GAUSSIAN ERROR LINEAR UNITS (GELUS) Gimpel et al. 2018 +function GaussErrorLinUnit(x : TNeuralFloat) : TNeuralFloat; +const cSqrt_2_pi = 0.797884560803; +begin + // I define some calculational boundaries here -> + if x > 6 + then + Result := x + else if x < -4 + then + Result := 0 + else + Result := 0.5*x*(1 + tanh( cSqrt_2_pi*( x + 0.044715*x*x*x))); +end; + +function GaussErrorLinUnitDerivative(x : TNeuralFloat) : TNeuralFloat; +begin + // from https://mlfromscratch.com/activation-functions-explained/#/ + // plus the derrivatives higher than 5 + if x > 6.5 + then + Result := 1 + else if x < -5 + then + Result := 0 + else + Result := 0.5 + 0.5*tanh(0.0356774*x*x*x + 0.797885 * x) + (0.0535161*x*x*x + 0.398942*x)*sqr( sech(0.0356774*x*x*x + 0.797885*x) ) +end; + +function Swish6Unit(x : TNeuralFloat) : TNeuralFloat; +begin + if x < -6 then + begin + Result := 0; + end + else if x < 6 then + begin + Result := x* (1 / ( 1 + Exp(-x) )); + end + else + begin + // max out at 6 + Result := 6; + end; +end; + +function Swish6Derivative(x : TNeuralFloat) : TNeuralFloat; +var sigmoidValue : TNeuralFloat; +begin + if x < -6 then + begin + Result := 0; + end + else if x < 6 then + begin + sigmoidValue := (1 / ( 1 + Exp(-x) )); + Result := x*sigmoidValue; + Result := Result + sigmoidValue*(1 - Result); + end + else + begin + // max out at 6 + Result := 0; + end; +end; + +function SwishUnit(x : TNeuralFloat) : TNeuralFloat; +begin + if x < -6 then + begin + Result := 0; + end + else if x < 6 then + begin + Result := x* (1 / ( 1 + Exp(-x) )); + end + else + begin + // + Result := x; + end; +end; + +function SwishDerivative(x : TNeuralFloat) : TNeuralFloat; +var sigmoidValue : TNeuralFloat; +begin + if x < -6 then + begin + Result := 0; + end + else if x < 6 then + begin + sigmoidValue := (1 / ( 1 + Exp(-x) )); + Result := x*sigmoidValue; + Result := Result + sigmoidValue*(1 - Result); + end + else + begin + // + Result := 1; + end; +end; + constructor TNNetVolumePair.Create(); begin inherited Create(); @@ -1507,20 +1622,22 @@ constructor TNNetVolumePair.Create(); FB := TNNetVolume.Create(); end; -constructor TNNetVolumePair.Create(pA, pB: TNNetVolume); +constructor TNNetVolumePair.Create(pA, pB: TNNetVolume; createCopy : boolean = False); begin - inherited Create(); - FA := pA; - FB := pB; -end; + inherited Create; -constructor TNNetVolumePair.CreateCopying(pA, pB: TNNetVolume); -begin - inherited Create(); - FA := TNNetVolume.Create(pA); - FB := TNNetVolume.Create(pB); - FA.Copy(pA); - FB.Copy(pB); + if createCopy then + begin + FA := TNNetVolume.Create(pA); + FB := TNNetVolume.Create(pB); + FA.Copy(pA); + FB.Copy(pB); + end + else + begin + FA := pA; + FB := pB; + end; end; destructor TNNetVolumePair.Destroy(); @@ -1988,7 +2105,6 @@ function TNNetVolumeList.GetAvg(): TNeuralFloat; procedure TNNetVolumeList.AddValue(Value: TNeuralFloat); var I: integer; - AuxVolume: TNNetVolume; begin if (Count>0) then begin @@ -2002,7 +2118,6 @@ procedure TNNetVolumeList.AddValue(Value: TNeuralFloat); procedure TNNetVolumeList.Divi(Value: TNeuralFloat); var I: integer; - AuxVolume: TNNetVolume; begin if (Count>0) then begin @@ -2476,6 +2591,7 @@ function TVolume.RandomGaussianValue(): TNeuralFloat; r, x, y: TNeuralFloat; begin r := 0; + x := 0; // loop executed 4 / pi = 1.273.. times on average while ( (r > 1) or (r = 0) ) do begin @@ -3017,7 +3133,9 @@ class procedure TVolume.MulAddPPVS(PtrA, PtrB: TNeuralFloatArrPtr; Value: T; I: integer; vHigh: integer; BasePos: integer; + {$IFDEF FPC} AddrA, AddrB: TNeuralFloatPtr; + {$ENDIF} begin BasePos := 0; vHigh := pSize - 1; @@ -3070,7 +3188,9 @@ class procedure TVolume.MulMulAdd(PtrA, PtrB: TNeuralFloatArrPtr; Value1, I: integer; vHigh: integer; BasePos: integer; + {$IFDEF FPC} AddrA, AddrB: TNeuralFloatPtr; + {$ENDIF} begin BasePos := 0; vHigh := pSize - 1; @@ -3120,14 +3240,16 @@ class procedure TVolume.MulAdd(PtrA, PtrB, PtrC: TNeuralFloatArrPtr; I: integer; vHigh: integer; BasePos: integer; + {$IFDEF FPC} AddrA, AddrB, AddrC: TNeuralFloatPtr; + {$ENDIF} begin BasePos := 0; + vHigh := pSize - 1; + {$IFDEF FPC} AddrA := pointer(PtrA); AddrB := pointer(PtrB); AddrC := pointer(PtrC); - vHigh := pSize - 1; - {$IFDEF FPC} while BasePos <= vHigh - 7 do begin (AddrA)^ := (AddrA)^ + (AddrB)^ * (AddrC)^; @@ -4946,10 +5068,13 @@ procedure TVolume.LoadFromString(strData: string); I: integer; AuxFloat: Single; begin - version := 1; S := CreateTokenizedStringList(strData,';'); version := StrToInt(S[0]); + + if version <> 1 then + raise Exception.Create('Error V' + IntToStr(version) + ' found but V1.0 expected'); + pSizeX := StrToInt(S[1]); pSizeY := StrToInt(S[2]); pDepth := StrToInt(S[3]); @@ -5162,11 +5287,12 @@ procedure TNNetVolume.InterleavedDotProduct(InterleavedAs, Bs: TNNetVolume; procedure TNNetVolume.DotProducts(NumAs, NumBs, VectorSize: integer; VAs, VBs: TNNetVolume); var - CntA, CntB, CntAPos, CntBPos, MaxA, MaxB: integer; - DestPointer: pointer; - CntBVectorSizePlusCntBPos: integer; + CntA, CntB, MaxA, MaxB: integer; + {$IFDEF AVXANY} vRes: array[0..3] of Single; localNumElements, MissedElements: integer; + {$ENDIF} + PtrA, PtrB: TNeuralFloatArrPtr; Result: TNeuralFloat; begin @@ -5175,8 +5301,10 @@ procedure TNNetVolume.DotProducts(NumAs, NumBs, VectorSize: integer; VAs, VBs: T //localNumElements := (VectorSize div 4) * 4; //MissedElements := VectorSize - localNumElements; + {$IFDEF AVXANY} MissedElements := VectorSize and 3; localNumElements := VectorSize xor MissedElements; + {$ENDIF} for CntB := 0 to MaxB do begin @@ -5418,11 +5546,12 @@ procedure TNNetVolume.DotProducts(NumAs, NumBs, VectorSize: integer; VAs, VBs: T procedure TNNetVolume.DotProductsTiled(NumAs, NumBs, VectorSize: integer; VAs, VBs: TNNetVolume; TileSizeA, TileSizeB: integer); var - CntA, CntB, CntAPos, CntBPos, MaxA, MaxB: integer; - DestPointer: pointer; - CntBVectorSizePlusCntBPos: integer; + CntA, CntB: integer; + {$IFDEF AVXANY} vRes: array[0..3] of Single; - localNumElements, MissedElements: integer; + localNumElements : integer; + MissedElements: integer; + {$ENDIF} PtrA, PtrB: TNeuralFloatArrPtr; Result: TNeuralFloat; // Tiling @@ -5430,13 +5559,12 @@ procedure TNNetVolume.DotProductsTiled(NumAs, NumBs, VectorSize: integer; VAs, V StartTileA, EndTileA, StartTileB, EndTileB: integer; MaxTileA, MaxTileB: integer; begin - MaxA := NumAs - 1; - MaxB := NumBs - 1; - //localNumElements := (VectorSize div 4) * 4; //MissedElements := VectorSize - localNumElements; + {$IFDEF AVXANY} MissedElements := VectorSize and 3; localNumElements := VectorSize xor MissedElements; + {$ENDIF} MaxTileA := (NumAs div TileSizeA) - 1; MaxTileB := (NumBs div TileSizeB) - 1; for TileBCnt := 0 to MaxTileB do @@ -5694,13 +5822,14 @@ procedure TNNetVolume.DotProductsTiled(NumAs, NumBs, VectorSize: integer; VAs, V procedure TNNetVolume.GroupedDotProductsTiled(Groups, NumAs, NumBs, VectorSize: integer; VAs, VBs: TNNetVolume; TileSizeA, TileSizeB: integer); var - CntA, CntB, CntAPos, CntBPos, MaxA, MaxB: integer; + CntA, CntB: integer; GroupId, GroupASize: integer; VectoreBSize: integer; + {$IFDEF AVXANY} DestPointer: pointer; - CntBVectorSizePlusCntBPos: integer; vRes: array[0..3] of Single; localNumElements, MissedElements: integer; + {$ENDIF} PtrA, PtrB: TNeuralFloatArrPtr; Result: TNeuralFloat; // Tiling @@ -5708,8 +5837,6 @@ procedure TNNetVolume.GroupedDotProductsTiled(Groups, NumAs, NumBs, StartTileA, EndTileA, StartTileB, EndTileB: integer; MaxTileA, MaxTileB: integer; begin - MaxA := NumAs - 1; - MaxB := NumBs - 1; GroupASize := NumAs div Groups; VectoreBSize := VectorSize * Groups; @@ -5727,8 +5854,10 @@ procedure TNNetVolume.GroupedDotProductsTiled(Groups, NumAs, NumBs, //localNumElements := (VectorSize div 4) * 4; //MissedElements := VectorSize - localNumElements; + {$IFDEF AVXANY} MissedElements := VectorSize and 3; localNumElements := VectorSize xor MissedElements; + {$ENDIF} MaxTileA := (NumAs div TileSizeA) - 1; MaxTileB := (NumBs div TileSizeB) - 1; for TileBCnt := 0 to MaxTileB do @@ -6000,31 +6129,26 @@ procedure TNNetVolume.AddArea(DestX, DestY, OriginX, OriginY, LenX, end; end; +// ########################################### +// #### local definitions for AVX determiniation + +var locAVX : boolean = False; + locAVX2 : boolean = False; + locAVX512 : boolean = False; + function TNNetVolume.HasAVX: boolean; begin - {$IFDEF AVXANY} - Result := true; - {$ELSE} - Result := false; - {$ENDIF} + Result := locAVX; end; function TNNetVolume.HasAVX2: boolean; begin - {$IFDEF AVX2} - Result := true; - {$ELSE} - Result := false; - {$ENDIF} + Result := locAVX2; end; function TNNetVolume.HasAVX512: boolean; begin - {$IFDEF AVX512} - Result := true; - {$ELSE} - Result := false; - {$ENDIF} + Result := locAVX512; end; function TNNetVolume.PearsonCorrelation(Y: TNNetVolume): TNeuralFloat; @@ -9230,9 +9354,25 @@ class function TVolume.DotProduct(PtrA, PtrB: TNeuralFloatArrPtr; NumElements: i ): Single; var I: integer; + {$IFDEF FPC} BasePos, vHigh: integer; AddrA, AddrB: TNeuralFloatPtr; + {$ENDIF} begin + {$IFNDEF FPC} + if false //locAVX and (NumElements > 4) + then + Result := AVXDotProd(PSingle(PtrA), PSingle(PtrB), NumElements) + else + begin + Result := 0; + for i := 0 to NumElements - 1 do + Result := Result + PtrA^[i]*PtrB^[i]; + end; + + exit; + + {$ELSE} Result := 0; BasePos := 0; vHigh := NumElements - 1; @@ -9276,6 +9416,7 @@ class function TVolume.DotProduct(PtrA, PtrB: TNeuralFloatArrPtr; NumElements: i end; //WriteLn('Hello: ', Result); //ReadLn(); + {$ENDIF} end; class function TVolume.Product(PtrA: TNeuralFloatArrPtr; @@ -9323,4 +9464,13 @@ procedure TNNetVolumePairList.SetItem(Index: Integer; AObject: TNNetVolumePair); end; {$ENDIF} + +// ########################################### +// #### Initialize cpu set variables +// ########################################### + +initialization + locAVX := IsAVXPresent; + locAVX2 := IsFMAPresent; + locAVX512 := IsAVX512Present; end. diff --git a/neural/neuralvolumev.pas b/neural/neuralvolumev.pas index 20341090..b77102d7 100644 --- a/neural/neuralvolumev.pas +++ b/neural/neuralvolumev.pas @@ -25,7 +25,7 @@ interface uses Classes, SysUtils, ExtCtrls, Graphics, neuralvolume, - {$IFDEF FPC}LCLType, FPImage {$ELSE}Winapi.Windows{$ENDIF} ; + {$IFDEF FPC}LCLType, FPImage {$ELSE}Windows{$ENDIF} ; /// saves a bitmap into a file from a handle HWND procedure SaveHandleToBitmap(OutputFileName: string; hWnd: HWND); @@ -47,7 +47,7 @@ procedure LoadImageFromFileIntoVolume(ImageFileName:string; V:TNNetVolume); {$ENDIF} implementation -{$IFDEF FPC}uses LCLIntf;{$ENDIF} +uses {$IFDEF FPC}LCLIntf,{$ENDIF}Math; procedure SaveHandleToBitmap(OutputFileName: string; hWnd: HWND); {$IFDEF FPC}