diff --git a/generated-src/linux-x86/crypto/chacha/chacha-x86.S b/generated-src/linux-x86/crypto/chacha/chacha-x86.S index a3dc02e0999..566fbb4cd61 100644 --- a/generated-src/linux-x86/crypto/chacha/chacha-x86.S +++ b/generated-src/linux-x86/crypto/chacha/chacha-x86.S @@ -5,29 +5,16 @@ #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__) .text -.globl ChaCha20_ctr32 -.hidden ChaCha20_ctr32 -.type ChaCha20_ctr32,@function +.globl ChaCha20_ctr32_nohw +.hidden ChaCha20_ctr32_nohw +.type ChaCha20_ctr32_nohw,@function .align 16 -ChaCha20_ctr32: -.L_ChaCha20_ctr32_begin: +ChaCha20_ctr32_nohw: +.L_ChaCha20_ctr32_nohw_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi - xorl %eax,%eax - cmpl 28(%esp),%eax - je .L000no_data - call .Lpic_point -.Lpic_point: - popl %eax - leal OPENSSL_ia32cap_P-.Lpic_point(%eax),%ebp - testl $16777216,(%ebp) - jz .L001x86 - testl $512,4(%ebp) - jz .L001x86 - jmp .Lssse3_shortcut -.L001x86: movl 32(%esp),%esi movl 36(%esp),%edi subl $132,%esp @@ -56,13 +43,13 @@ ChaCha20_ctr32: movl %ebx,116(%esp) movl %ecx,120(%esp) movl %edx,124(%esp) - jmp .L002entry + jmp .L000entry .align 16 -.L003outer_loop: +.L001outer_loop: movl %ebx,156(%esp) movl %eax,152(%esp) movl %ecx,160(%esp) -.L002entry: +.L000entry: movl $1634760805,%eax movl $857760878,4(%esp) movl $2036477234,8(%esp) @@ -90,9 +77,9 @@ ChaCha20_ctr32: movl %edi,60(%esp) movl %edx,112(%esp) movl $10,%ebx - jmp .L004loop + jmp .L002loop .align 16 -.L004loop: +.L002loop: addl %ebp,%eax movl %ebx,128(%esp) movl %ebp,%ebx @@ -246,14 +233,14 @@ ChaCha20_ctr32: xorl %esi,%ebp roll $7,%ebp decl %ebx - jnz .L004loop + jnz .L002loop movl 160(%esp),%ebx addl $1634760805,%eax addl 80(%esp),%ebp addl 96(%esp),%ecx addl 100(%esp),%esi cmpl $64,%ebx - jb .L005tail + jb .L003tail movl 156(%esp),%ebx addl 112(%esp),%edx addl 120(%esp),%edi @@ -316,9 +303,9 @@ ChaCha20_ctr32: movl %ebp,(%eax) leal 64(%eax),%eax subl $64,%ecx - jnz .L003outer_loop - jmp .L006done -.L005tail: + jnz .L001outer_loop + jmp .L004done +.L003tail: addl 112(%esp),%edx addl 120(%esp),%edi movl %eax,(%esp) @@ -362,34 +349,35 @@ ChaCha20_ctr32: movl %edi,60(%esp) xorl %eax,%eax xorl %edx,%edx -.L007tail_loop: +.L005tail_loop: movb (%esi,%ebp,1),%al movb (%esp,%esi,1),%dl leal 1(%esi),%esi xorb %dl,%al movb %al,-1(%ecx,%esi,1) decl %ebx - jnz .L007tail_loop -.L006done: + jnz .L005tail_loop +.L004done: addl $132,%esp -.L000no_data: popl %edi popl %esi popl %ebx popl %ebp ret -.size ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin -.globl ChaCha20_ssse3 -.hidden ChaCha20_ssse3 -.type ChaCha20_ssse3,@function +.size ChaCha20_ctr32_nohw,.-.L_ChaCha20_ctr32_nohw_begin +.globl ChaCha20_ctr32_ssse3 +.hidden ChaCha20_ctr32_ssse3 +.type ChaCha20_ctr32_ssse3,@function .align 16 -ChaCha20_ssse3: -.L_ChaCha20_ssse3_begin: +ChaCha20_ctr32_ssse3: +.L_ChaCha20_ctr32_ssse3_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi -.Lssse3_shortcut: + call .Lpic_point +.Lpic_point: + popl %eax movl 20(%esp),%edi movl 24(%esp),%esi movl 28(%esp),%ecx @@ -402,7 +390,7 @@ ChaCha20_ssse3: leal .Lssse3_data-.Lpic_point(%eax),%eax movdqu (%ebx),%xmm3 cmpl $256,%ecx - jb .L0081x + jb .L0061x movl %edx,516(%esp) movl %ebx,520(%esp) subl $256,%ecx @@ -447,9 +435,9 @@ ChaCha20_ssse3: movdqa %xmm7,-80(%ebp) leal 128(%esi),%esi leal 128(%edi),%edi - jmp .L009outer_loop + jmp .L007outer_loop .align 16 -.L009outer_loop: +.L007outer_loop: movdqa -112(%ebp),%xmm1 movdqa -96(%ebp),%xmm2 movdqa -80(%ebp),%xmm3 @@ -484,7 +472,7 @@ ChaCha20_ssse3: movl $10,%edx nop .align 16 -.L010loop: +.L008loop: paddd %xmm3,%xmm0 movdqa %xmm3,%xmm2 pxor %xmm0,%xmm6 @@ -684,7 +672,7 @@ ChaCha20_ssse3: psrld $25,%xmm1 por %xmm1,%xmm3 decl %edx - jnz .L010loop + jnz .L008loop movdqa %xmm3,-64(%ebx) movdqa %xmm4,(%ebx) movdqa %xmm5,16(%ebx) @@ -826,9 +814,9 @@ ChaCha20_ssse3: movdqu %xmm7,64(%edi) leal 208(%edi),%edi subl $256,%ecx - jnc .L009outer_loop + jnc .L007outer_loop addl $256,%ecx - jz .L011done + jz .L009done movl 520(%esp),%ebx leal -128(%esi),%esi movl 516(%esp),%edx @@ -838,7 +826,7 @@ ChaCha20_ssse3: paddd 96(%eax),%xmm2 pand 112(%eax),%xmm3 por %xmm2,%xmm3 -.L0081x: +.L0061x: movdqa 32(%eax),%xmm0 movdqu (%edx),%xmm1 movdqu 16(%edx),%xmm2 @@ -850,9 +838,9 @@ ChaCha20_ssse3: movdqa %xmm2,32(%esp) movdqa %xmm3,48(%esp) movl $10,%edx - jmp .L012loop1x + jmp .L010loop1x .align 16 -.L013outer1x: +.L011outer1x: movdqa 80(%eax),%xmm3 movdqa (%esp),%xmm0 movdqa 16(%esp),%xmm1 @@ -860,9 +848,9 @@ ChaCha20_ssse3: paddd 48(%esp),%xmm3 movl $10,%edx movdqa %xmm3,48(%esp) - jmp .L012loop1x + jmp .L010loop1x .align 16 -.L012loop1x: +.L010loop1x: paddd %xmm1,%xmm0 pxor %xmm0,%xmm3 .byte 102,15,56,0,222 @@ -907,13 +895,13 @@ ChaCha20_ssse3: pshufd $147,%xmm1,%xmm1 pshufd $57,%xmm3,%xmm3 decl %edx - jnz .L012loop1x + jnz .L010loop1x paddd (%esp),%xmm0 paddd 16(%esp),%xmm1 paddd 32(%esp),%xmm2 paddd 48(%esp),%xmm3 cmpl $64,%ecx - jb .L014tail + jb .L012tail movdqu (%esi),%xmm4 movdqu 16(%esi),%xmm5 pxor %xmm4,%xmm0 @@ -929,9 +917,9 @@ ChaCha20_ssse3: movdqu %xmm3,48(%edi) leal 64(%edi),%edi subl $64,%ecx - jnz .L013outer1x - jmp .L011done -.L014tail: + jnz .L011outer1x + jmp .L009done +.L012tail: movdqa %xmm0,(%esp) movdqa %xmm1,16(%esp) movdqa %xmm2,32(%esp) @@ -939,22 +927,22 @@ ChaCha20_ssse3: xorl %eax,%eax xorl %edx,%edx xorl %ebp,%ebp -.L015tail_loop: +.L013tail_loop: movb (%esp,%ebp,1),%al movb (%esi,%ebp,1),%dl leal 1(%ebp),%ebp xorb %dl,%al movb %al,-1(%edi,%ebp,1) decl %ecx - jnz .L015tail_loop -.L011done: + jnz .L013tail_loop +.L009done: movl 512(%esp),%esp popl %edi popl %esi popl %ebx popl %ebp ret -.size ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin +.size ChaCha20_ctr32_ssse3,.-.L_ChaCha20_ctr32_ssse3_begin .align 64 .Lssse3_data: .byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 diff --git a/generated-src/mac-x86/crypto/chacha/chacha-x86.S b/generated-src/mac-x86/crypto/chacha/chacha-x86.S index baa06acf807..48293da9185 100644 --- a/generated-src/mac-x86/crypto/chacha/chacha-x86.S +++ b/generated-src/mac-x86/crypto/chacha/chacha-x86.S @@ -5,28 +5,15 @@ #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__) .text -.globl _ChaCha20_ctr32 -.private_extern _ChaCha20_ctr32 +.globl _ChaCha20_ctr32_nohw +.private_extern _ChaCha20_ctr32_nohw .align 4 -_ChaCha20_ctr32: -L_ChaCha20_ctr32_begin: +_ChaCha20_ctr32_nohw: +L_ChaCha20_ctr32_nohw_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi - xorl %eax,%eax - cmpl 28(%esp),%eax - je L000no_data - call Lpic_point -Lpic_point: - popl %eax - movl L_OPENSSL_ia32cap_P$non_lazy_ptr-Lpic_point(%eax),%ebp - testl $16777216,(%ebp) - jz L001x86 - testl $512,4(%ebp) - jz L001x86 - jmp Lssse3_shortcut -L001x86: movl 32(%esp),%esi movl 36(%esp),%edi subl $132,%esp @@ -55,13 +42,13 @@ L001x86: movl %ebx,116(%esp) movl %ecx,120(%esp) movl %edx,124(%esp) - jmp L002entry + jmp L000entry .align 4,0x90 -L003outer_loop: +L001outer_loop: movl %ebx,156(%esp) movl %eax,152(%esp) movl %ecx,160(%esp) -L002entry: +L000entry: movl $1634760805,%eax movl $857760878,4(%esp) movl $2036477234,8(%esp) @@ -89,9 +76,9 @@ L002entry: movl %edi,60(%esp) movl %edx,112(%esp) movl $10,%ebx - jmp L004loop + jmp L002loop .align 4,0x90 -L004loop: +L002loop: addl %ebp,%eax movl %ebx,128(%esp) movl %ebp,%ebx @@ -245,14 +232,14 @@ L004loop: xorl %esi,%ebp roll $7,%ebp decl %ebx - jnz L004loop + jnz L002loop movl 160(%esp),%ebx addl $1634760805,%eax addl 80(%esp),%ebp addl 96(%esp),%ecx addl 100(%esp),%esi cmpl $64,%ebx - jb L005tail + jb L003tail movl 156(%esp),%ebx addl 112(%esp),%edx addl 120(%esp),%edi @@ -315,9 +302,9 @@ L004loop: movl %ebp,(%eax) leal 64(%eax),%eax subl $64,%ecx - jnz L003outer_loop - jmp L006done -L005tail: + jnz L001outer_loop + jmp L004done +L003tail: addl 112(%esp),%edx addl 120(%esp),%edi movl %eax,(%esp) @@ -361,32 +348,33 @@ L005tail: movl %edi,60(%esp) xorl %eax,%eax xorl %edx,%edx -L007tail_loop: +L005tail_loop: movb (%esi,%ebp,1),%al movb (%esp,%esi,1),%dl leal 1(%esi),%esi xorb %dl,%al movb %al,-1(%ecx,%esi,1) decl %ebx - jnz L007tail_loop -L006done: + jnz L005tail_loop +L004done: addl $132,%esp -L000no_data: popl %edi popl %esi popl %ebx popl %ebp ret -.globl _ChaCha20_ssse3 -.private_extern _ChaCha20_ssse3 +.globl _ChaCha20_ctr32_ssse3 +.private_extern _ChaCha20_ctr32_ssse3 .align 4 -_ChaCha20_ssse3: -L_ChaCha20_ssse3_begin: +_ChaCha20_ctr32_ssse3: +L_ChaCha20_ctr32_ssse3_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi -Lssse3_shortcut: + call Lpic_point +Lpic_point: + popl %eax movl 20(%esp),%edi movl 24(%esp),%esi movl 28(%esp),%ecx @@ -399,7 +387,7 @@ Lssse3_shortcut: leal Lssse3_data-Lpic_point(%eax),%eax movdqu (%ebx),%xmm3 cmpl $256,%ecx - jb L0081x + jb L0061x movl %edx,516(%esp) movl %ebx,520(%esp) subl $256,%ecx @@ -444,9 +432,9 @@ Lssse3_shortcut: movdqa %xmm7,-80(%ebp) leal 128(%esi),%esi leal 128(%edi),%edi - jmp L009outer_loop + jmp L007outer_loop .align 4,0x90 -L009outer_loop: +L007outer_loop: movdqa -112(%ebp),%xmm1 movdqa -96(%ebp),%xmm2 movdqa -80(%ebp),%xmm3 @@ -481,7 +469,7 @@ L009outer_loop: movl $10,%edx nop .align 4,0x90 -L010loop: +L008loop: paddd %xmm3,%xmm0 movdqa %xmm3,%xmm2 pxor %xmm0,%xmm6 @@ -681,7 +669,7 @@ L010loop: psrld $25,%xmm1 por %xmm1,%xmm3 decl %edx - jnz L010loop + jnz L008loop movdqa %xmm3,-64(%ebx) movdqa %xmm4,(%ebx) movdqa %xmm5,16(%ebx) @@ -823,9 +811,9 @@ L010loop: movdqu %xmm7,64(%edi) leal 208(%edi),%edi subl $256,%ecx - jnc L009outer_loop + jnc L007outer_loop addl $256,%ecx - jz L011done + jz L009done movl 520(%esp),%ebx leal -128(%esi),%esi movl 516(%esp),%edx @@ -835,7 +823,7 @@ L010loop: paddd 96(%eax),%xmm2 pand 112(%eax),%xmm3 por %xmm2,%xmm3 -L0081x: +L0061x: movdqa 32(%eax),%xmm0 movdqu (%edx),%xmm1 movdqu 16(%edx),%xmm2 @@ -847,9 +835,9 @@ L0081x: movdqa %xmm2,32(%esp) movdqa %xmm3,48(%esp) movl $10,%edx - jmp L012loop1x + jmp L010loop1x .align 4,0x90 -L013outer1x: +L011outer1x: movdqa 80(%eax),%xmm3 movdqa (%esp),%xmm0 movdqa 16(%esp),%xmm1 @@ -857,9 +845,9 @@ L013outer1x: paddd 48(%esp),%xmm3 movl $10,%edx movdqa %xmm3,48(%esp) - jmp L012loop1x + jmp L010loop1x .align 4,0x90 -L012loop1x: +L010loop1x: paddd %xmm1,%xmm0 pxor %xmm0,%xmm3 .byte 102,15,56,0,222 @@ -904,13 +892,13 @@ L012loop1x: pshufd $147,%xmm1,%xmm1 pshufd $57,%xmm3,%xmm3 decl %edx - jnz L012loop1x + jnz L010loop1x paddd (%esp),%xmm0 paddd 16(%esp),%xmm1 paddd 32(%esp),%xmm2 paddd 48(%esp),%xmm3 cmpl $64,%ecx - jb L014tail + jb L012tail movdqu (%esi),%xmm4 movdqu 16(%esi),%xmm5 pxor %xmm4,%xmm0 @@ -926,9 +914,9 @@ L012loop1x: movdqu %xmm3,48(%edi) leal 64(%edi),%edi subl $64,%ecx - jnz L013outer1x - jmp L011done -L014tail: + jnz L011outer1x + jmp L009done +L012tail: movdqa %xmm0,(%esp) movdqa %xmm1,16(%esp) movdqa %xmm2,32(%esp) @@ -936,15 +924,15 @@ L014tail: xorl %eax,%eax xorl %edx,%edx xorl %ebp,%ebp -L015tail_loop: +L013tail_loop: movb (%esp,%ebp,1),%al movb (%esi,%ebp,1),%dl leal 1(%ebp),%ebp xorb %dl,%al movb %al,-1(%edi,%ebp,1) decl %ecx - jnz L015tail_loop -L011done: + jnz L013tail_loop +L009done: movl 512(%esp),%esp popl %edi popl %esi @@ -966,8 +954,4 @@ Lssse3_data: .byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 .byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 .byte 114,103,62,0 -.section __IMPORT,__pointers,non_lazy_symbol_pointers -L_OPENSSL_ia32cap_P$non_lazy_ptr: -.indirect_symbol _OPENSSL_ia32cap_P -.long 0 #endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__) diff --git a/generated-src/win-x86/crypto/chacha/chacha-x86.asm b/generated-src/win-x86/crypto/chacha/chacha-x86.asm index e99ad2915ea..880348ca49d 100644 --- a/generated-src/win-x86/crypto/chacha/chacha-x86.asm +++ b/generated-src/win-x86/crypto/chacha/chacha-x86.asm @@ -11,27 +11,14 @@ section .text code align=64 %else section .text code %endif -global _ChaCha20_ctr32 +global _ChaCha20_ctr32_nohw align 16 -_ChaCha20_ctr32: -L$_ChaCha20_ctr32_begin: +_ChaCha20_ctr32_nohw: +L$_ChaCha20_ctr32_nohw_begin: push ebp push ebx push esi push edi - xor eax,eax - cmp eax,DWORD [28+esp] - je NEAR L$000no_data - call L$pic_point -L$pic_point: - pop eax - lea ebp,[_OPENSSL_ia32cap_P] - test DWORD [ebp],16777216 - jz NEAR L$001x86 - test DWORD [4+ebp],512 - jz NEAR L$001x86 - jmp NEAR L$ssse3_shortcut -L$001x86: mov esi,DWORD [32+esp] mov edi,DWORD [36+esp] sub esp,132 @@ -60,13 +47,13 @@ L$001x86: mov DWORD [116+esp],ebx mov DWORD [120+esp],ecx mov DWORD [124+esp],edx - jmp NEAR L$002entry + jmp NEAR L$000entry align 16 -L$003outer_loop: +L$001outer_loop: mov DWORD [156+esp],ebx mov DWORD [152+esp],eax mov DWORD [160+esp],ecx -L$002entry: +L$000entry: mov eax,1634760805 mov DWORD [4+esp],857760878 mov DWORD [8+esp],2036477234 @@ -94,9 +81,9 @@ L$002entry: mov DWORD [60+esp],edi mov DWORD [112+esp],edx mov ebx,10 - jmp NEAR L$004loop + jmp NEAR L$002loop align 16 -L$004loop: +L$002loop: add eax,ebp mov DWORD [128+esp],ebx mov ebx,ebp @@ -250,14 +237,14 @@ L$004loop: xor ebp,esi rol ebp,7 dec ebx - jnz NEAR L$004loop + jnz NEAR L$002loop mov ebx,DWORD [160+esp] add eax,1634760805 add ebp,DWORD [80+esp] add ecx,DWORD [96+esp] add esi,DWORD [100+esp] cmp ebx,64 - jb NEAR L$005tail + jb NEAR L$003tail mov ebx,DWORD [156+esp] add edx,DWORD [112+esp] add edi,DWORD [120+esp] @@ -320,9 +307,9 @@ L$004loop: mov DWORD [eax],ebp lea eax,[64+eax] sub ecx,64 - jnz NEAR L$003outer_loop - jmp NEAR L$006done -L$005tail: + jnz NEAR L$001outer_loop + jmp NEAR L$004done +L$003tail: add edx,DWORD [112+esp] add edi,DWORD [120+esp] mov DWORD [esp],eax @@ -366,31 +353,32 @@ L$005tail: mov DWORD [60+esp],edi xor eax,eax xor edx,edx -L$007tail_loop: +L$005tail_loop: mov al,BYTE [ebp*1+esi] mov dl,BYTE [esi*1+esp] lea esi,[1+esi] xor al,dl mov BYTE [esi*1+ecx-1],al dec ebx - jnz NEAR L$007tail_loop -L$006done: + jnz NEAR L$005tail_loop +L$004done: add esp,132 -L$000no_data: pop edi pop esi pop ebx pop ebp ret -global _ChaCha20_ssse3 +global _ChaCha20_ctr32_ssse3 align 16 -_ChaCha20_ssse3: -L$_ChaCha20_ssse3_begin: +_ChaCha20_ctr32_ssse3: +L$_ChaCha20_ctr32_ssse3_begin: push ebp push ebx push esi push edi -L$ssse3_shortcut: + call L$pic_point +L$pic_point: + pop eax mov edi,DWORD [20+esp] mov esi,DWORD [24+esp] mov ecx,DWORD [28+esp] @@ -403,7 +391,7 @@ L$ssse3_shortcut: lea eax,[(L$ssse3_data-L$pic_point)+eax] movdqu xmm3,[ebx] cmp ecx,256 - jb NEAR L$0081x + jb NEAR L$0061x mov DWORD [516+esp],edx mov DWORD [520+esp],ebx sub ecx,256 @@ -448,9 +436,9 @@ L$ssse3_shortcut: movdqa [ebp-80],xmm7 lea esi,[128+esi] lea edi,[128+edi] - jmp NEAR L$009outer_loop + jmp NEAR L$007outer_loop align 16 -L$009outer_loop: +L$007outer_loop: movdqa xmm1,[ebp-112] movdqa xmm2,[ebp-96] movdqa xmm3,[ebp-80] @@ -485,7 +473,7 @@ L$009outer_loop: mov edx,10 nop align 16 -L$010loop: +L$008loop: paddd xmm0,xmm3 movdqa xmm2,xmm3 pxor xmm6,xmm0 @@ -685,7 +673,7 @@ L$010loop: psrld xmm1,25 por xmm3,xmm1 dec edx - jnz NEAR L$010loop + jnz NEAR L$008loop movdqa [ebx-64],xmm3 movdqa [ebx],xmm4 movdqa [16+ebx],xmm5 @@ -827,9 +815,9 @@ L$010loop: movdqu [64+edi],xmm7 lea edi,[208+edi] sub ecx,256 - jnc NEAR L$009outer_loop + jnc NEAR L$007outer_loop add ecx,256 - jz NEAR L$011done + jz NEAR L$009done mov ebx,DWORD [520+esp] lea esi,[esi-128] mov edx,DWORD [516+esp] @@ -839,7 +827,7 @@ L$010loop: paddd xmm2,[96+eax] pand xmm3,[112+eax] por xmm3,xmm2 -L$0081x: +L$0061x: movdqa xmm0,[32+eax] movdqu xmm1,[edx] movdqu xmm2,[16+edx] @@ -851,9 +839,9 @@ L$0081x: movdqa [32+esp],xmm2 movdqa [48+esp],xmm3 mov edx,10 - jmp NEAR L$012loop1x + jmp NEAR L$010loop1x align 16 -L$013outer1x: +L$011outer1x: movdqa xmm3,[80+eax] movdqa xmm0,[esp] movdqa xmm1,[16+esp] @@ -861,9 +849,9 @@ L$013outer1x: paddd xmm3,[48+esp] mov edx,10 movdqa [48+esp],xmm3 - jmp NEAR L$012loop1x + jmp NEAR L$010loop1x align 16 -L$012loop1x: +L$010loop1x: paddd xmm0,xmm1 pxor xmm3,xmm0 db 102,15,56,0,222 @@ -908,13 +896,13 @@ db 102,15,56,0,223 pshufd xmm1,xmm1,147 pshufd xmm3,xmm3,57 dec edx - jnz NEAR L$012loop1x + jnz NEAR L$010loop1x paddd xmm0,[esp] paddd xmm1,[16+esp] paddd xmm2,[32+esp] paddd xmm3,[48+esp] cmp ecx,64 - jb NEAR L$014tail + jb NEAR L$012tail movdqu xmm4,[esi] movdqu xmm5,[16+esi] pxor xmm0,xmm4 @@ -930,9 +918,9 @@ db 102,15,56,0,223 movdqu [48+edi],xmm3 lea edi,[64+edi] sub ecx,64 - jnz NEAR L$013outer1x - jmp NEAR L$011done -L$014tail: + jnz NEAR L$011outer1x + jmp NEAR L$009done +L$012tail: movdqa [esp],xmm0 movdqa [16+esp],xmm1 movdqa [32+esp],xmm2 @@ -940,15 +928,15 @@ L$014tail: xor eax,eax xor edx,edx xor ebp,ebp -L$015tail_loop: +L$013tail_loop: mov al,BYTE [ebp*1+esp] mov dl,BYTE [ebp*1+esi] lea ebp,[1+ebp] xor al,dl mov BYTE [ebp*1+edi-1],al dec ecx - jnz NEAR L$015tail_loop -L$011done: + jnz NEAR L$013tail_loop +L$009done: mov esp,DWORD [512+esp] pop edi pop esi @@ -970,8 +958,6 @@ db 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 db 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 db 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 db 114,103,62,0 -segment .bss -common _OPENSSL_ia32cap_P 16 %else ; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 ret