5
5
6
6
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__)
7
7
.text
8
- .globl ChaCha20_ctr32
9
- .hidden ChaCha20_ctr32
10
- .type ChaCha20_ctr32 ,@function
8
+ .globl ChaCha20_ctr32_nohw
9
+ .hidden ChaCha20_ctr32_nohw
10
+ .type ChaCha20_ctr32_nohw ,@function
11
11
.align 16
12
- ChaCha20_ctr32 :
13
- .L_ChaCha20_ctr32_begin :
12
+ ChaCha20_ctr32_nohw :
13
+ .L_ChaCha20_ctr32_nohw_begin :
14
14
pushl %ebp
15
15
pushl %ebx
16
16
pushl %esi
17
17
pushl %edi
18
- xorl %eax ,%eax
19
- cmpl 28 (%esp ),%eax
20
- je .L000no_data
21
- call .Lpic_point
22
- .Lpic_point:
23
- popl %eax
24
- leal OPENSSL_ia32cap_P-.Lpic_point(%eax ),%ebp
25
- testl $16777216 ,(%ebp )
26
- jz .L001x86
27
- testl $512 ,4 (%ebp )
28
- jz .L001x86
29
- jmp .Lssse3_shortcut
30
- .L001x86:
31
18
movl 32 (%esp ),%esi
32
19
movl 36 (%esp ),%edi
33
20
subl $132 ,%esp
@@ -56,13 +43,13 @@ ChaCha20_ctr32:
56
43
movl %ebx ,116 (%esp )
57
44
movl %ecx ,120 (%esp )
58
45
movl %edx ,124 (%esp )
59
- jmp .L002entry
46
+ jmp .L000entry
60
47
.align 16
61
- .L003outer_loop :
48
+ .L001outer_loop :
62
49
movl %ebx ,156 (%esp )
63
50
movl %eax ,152 (%esp )
64
51
movl %ecx ,160 (%esp )
65
- .L002entry :
52
+ .L000entry :
66
53
movl $1634760805 ,%eax
67
54
movl $857760878 ,4 (%esp )
68
55
movl $2036477234 ,8 (%esp )
@@ -90,9 +77,9 @@ ChaCha20_ctr32:
90
77
movl %edi ,60 (%esp )
91
78
movl %edx ,112 (%esp )
92
79
movl $10 ,%ebx
93
- jmp .L004loop
80
+ jmp .L002loop
94
81
.align 16
95
- .L004loop :
82
+ .L002loop :
96
83
addl %ebp ,%eax
97
84
movl %ebx ,128 (%esp )
98
85
movl %ebp ,%ebx
@@ -246,14 +233,14 @@ ChaCha20_ctr32:
246
233
xorl %esi ,%ebp
247
234
roll $7 ,%ebp
248
235
decl %ebx
249
- jnz .L004loop
236
+ jnz .L002loop
250
237
movl 160 (%esp ),%ebx
251
238
addl $1634760805 ,%eax
252
239
addl 80 (%esp ),%ebp
253
240
addl 96 (%esp ),%ecx
254
241
addl 100 (%esp ),%esi
255
242
cmpl $64 ,%ebx
256
- jb .L005tail
243
+ jb .L003tail
257
244
movl 156 (%esp ),%ebx
258
245
addl 112 (%esp ),%edx
259
246
addl 120 (%esp ),%edi
@@ -316,9 +303,9 @@ ChaCha20_ctr32:
316
303
movl %ebp ,(%eax )
317
304
leal 64 (%eax ),%eax
318
305
subl $64 ,%ecx
319
- jnz .L003outer_loop
320
- jmp .L006done
321
- .L005tail :
306
+ jnz .L001outer_loop
307
+ jmp .L004done
308
+ .L003tail :
322
309
addl 112 (%esp ),%edx
323
310
addl 120 (%esp ),%edi
324
311
movl %eax ,(%esp )
@@ -362,34 +349,35 @@ ChaCha20_ctr32:
362
349
movl %edi ,60 (%esp )
363
350
xorl %eax ,%eax
364
351
xorl %edx ,%edx
365
- .L007tail_loop :
352
+ .L005tail_loop :
366
353
movb (%esi ,%ebp ,1 ),%al
367
354
movb (%esp ,%esi ,1 ),%dl
368
355
leal 1 (%esi ),%esi
369
356
xorb %dl ,%al
370
357
movb %al ,-1 (%ecx ,%esi ,1 )
371
358
decl %ebx
372
- jnz .L007tail_loop
373
- .L006done :
359
+ jnz .L005tail_loop
360
+ .L004done :
374
361
addl $132 ,%esp
375
- .L000no_data:
376
362
popl %edi
377
363
popl %esi
378
364
popl %ebx
379
365
popl %ebp
380
366
ret
381
- .size ChaCha20_ctr32 ,.-.L_ChaCha20_ctr32_begin
382
- .globl ChaCha20_ssse3
383
- .hidden ChaCha20_ssse3
384
- .type ChaCha20_ssse3 ,@function
367
+ .size ChaCha20_ctr32_nohw ,.-.L_ChaCha20_ctr32_nohw_begin
368
+ .globl ChaCha20_ctr32_ssse3
369
+ .hidden ChaCha20_ctr32_ssse3
370
+ .type ChaCha20_ctr32_ssse3 ,@function
385
371
.align 16
386
- ChaCha20_ssse3 :
387
- .L_ChaCha20_ssse3_begin :
372
+ ChaCha20_ctr32_ssse3 :
373
+ .L_ChaCha20_ctr32_ssse3_begin :
388
374
pushl %ebp
389
375
pushl %ebx
390
376
pushl %esi
391
377
pushl %edi
392
- .Lssse3_shortcut:
378
+ call .Lpic_point
379
+ .Lpic_point:
380
+ popl %eax
393
381
movl 20 (%esp ),%edi
394
382
movl 24 (%esp ),%esi
395
383
movl 28 (%esp ),%ecx
@@ -402,7 +390,7 @@ ChaCha20_ssse3:
402
390
leal .Lssse3_data-.Lpic_point(%eax ),%eax
403
391
movdqu (%ebx ),%xmm3
404
392
cmpl $256 ,%ecx
405
- jb .L0081x
393
+ jb .L0061x
406
394
movl %edx ,516 (%esp )
407
395
movl %ebx ,520 (%esp )
408
396
subl $256 ,%ecx
@@ -447,9 +435,9 @@ ChaCha20_ssse3:
447
435
movdqa %xmm7 ,-80 (%ebp )
448
436
leal 128 (%esi ),%esi
449
437
leal 128 (%edi ),%edi
450
- jmp .L009outer_loop
438
+ jmp .L007outer_loop
451
439
.align 16
452
- .L009outer_loop :
440
+ .L007outer_loop :
453
441
movdqa -112 (%ebp ),%xmm1
454
442
movdqa -96 (%ebp ),%xmm2
455
443
movdqa -80 (%ebp ),%xmm3
@@ -484,7 +472,7 @@ ChaCha20_ssse3:
484
472
movl $10 ,%edx
485
473
nop
486
474
.align 16
487
- .L010loop :
475
+ .L008loop :
488
476
paddd %xmm3 ,%xmm0
489
477
movdqa %xmm3 ,%xmm2
490
478
pxor %xmm0 ,%xmm6
@@ -684,7 +672,7 @@ ChaCha20_ssse3:
684
672
psrld $25 ,%xmm1
685
673
por %xmm1 ,%xmm3
686
674
decl %edx
687
- jnz .L010loop
675
+ jnz .L008loop
688
676
movdqa %xmm3 ,-64 (%ebx )
689
677
movdqa %xmm4 ,(%ebx )
690
678
movdqa %xmm5 ,16 (%ebx )
@@ -826,9 +814,9 @@ ChaCha20_ssse3:
826
814
movdqu %xmm7 ,64 (%edi )
827
815
leal 208 (%edi ),%edi
828
816
subl $256 ,%ecx
829
- jnc .L009outer_loop
817
+ jnc .L007outer_loop
830
818
addl $256 ,%ecx
831
- jz .L011done
819
+ jz .L009done
832
820
movl 520 (%esp ),%ebx
833
821
leal -128 (%esi ),%esi
834
822
movl 516 (%esp ),%edx
@@ -838,7 +826,7 @@ ChaCha20_ssse3:
838
826
paddd 96 (%eax ),%xmm2
839
827
pand 112 (%eax ),%xmm3
840
828
por %xmm2 ,%xmm3
841
- .L0081x :
829
+ .L0061x :
842
830
movdqa 32 (%eax ),%xmm0
843
831
movdqu (%edx ),%xmm1
844
832
movdqu 16 (%edx ),%xmm2
@@ -850,19 +838,19 @@ ChaCha20_ssse3:
850
838
movdqa %xmm2 ,32 (%esp )
851
839
movdqa %xmm3 ,48 (%esp )
852
840
movl $10 ,%edx
853
- jmp .L012loop1x
841
+ jmp .L010loop1x
854
842
.align 16
855
- .L013outer1x :
843
+ .L011outer1x :
856
844
movdqa 80 (%eax ),%xmm3
857
845
movdqa (%esp ),%xmm0
858
846
movdqa 16 (%esp ),%xmm1
859
847
movdqa 32 (%esp ),%xmm2
860
848
paddd 48 (%esp ),%xmm3
861
849
movl $10 ,%edx
862
850
movdqa %xmm3 ,48 (%esp )
863
- jmp .L012loop1x
851
+ jmp .L010loop1x
864
852
.align 16
865
- .L012loop1x :
853
+ .L010loop1x :
866
854
paddd %xmm1 ,%xmm0
867
855
pxor %xmm0 ,%xmm3
868
856
.byte 102 ,15 ,56 ,0 ,222
@@ -907,13 +895,13 @@ ChaCha20_ssse3:
907
895
pshufd $147 ,%xmm1 ,%xmm1
908
896
pshufd $57 ,%xmm3 ,%xmm3
909
897
decl %edx
910
- jnz .L012loop1x
898
+ jnz .L010loop1x
911
899
paddd (%esp ),%xmm0
912
900
paddd 16 (%esp ),%xmm1
913
901
paddd 32 (%esp ),%xmm2
914
902
paddd 48 (%esp ),%xmm3
915
903
cmpl $64 ,%ecx
916
- jb .L014tail
904
+ jb .L012tail
917
905
movdqu (%esi ),%xmm4
918
906
movdqu 16 (%esi ),%xmm5
919
907
pxor %xmm4 ,%xmm0
@@ -929,32 +917,32 @@ ChaCha20_ssse3:
929
917
movdqu %xmm3 ,48 (%edi )
930
918
leal 64 (%edi ),%edi
931
919
subl $64 ,%ecx
932
- jnz .L013outer1x
933
- jmp .L011done
934
- .L014tail :
920
+ jnz .L011outer1x
921
+ jmp .L009done
922
+ .L012tail :
935
923
movdqa %xmm0 ,(%esp )
936
924
movdqa %xmm1 ,16 (%esp )
937
925
movdqa %xmm2 ,32 (%esp )
938
926
movdqa %xmm3 ,48 (%esp )
939
927
xorl %eax ,%eax
940
928
xorl %edx ,%edx
941
929
xorl %ebp ,%ebp
942
- .L015tail_loop :
930
+ .L013tail_loop :
943
931
movb (%esp ,%ebp ,1 ),%al
944
932
movb (%esi ,%ebp ,1 ),%dl
945
933
leal 1 (%ebp ),%ebp
946
934
xorb %dl ,%al
947
935
movb %al ,-1 (%edi ,%ebp ,1 )
948
936
decl %ecx
949
- jnz .L015tail_loop
950
- .L011done :
937
+ jnz .L013tail_loop
938
+ .L009done :
951
939
movl 512 (%esp ),%esp
952
940
popl %edi
953
941
popl %esi
954
942
popl %ebx
955
943
popl %ebp
956
944
ret
957
- .size ChaCha20_ssse3 ,.-.L_ChaCha20_ssse3_begin
945
+ .size ChaCha20_ctr32_ssse3 ,.-.L_ChaCha20_ctr32_ssse3_begin
958
946
.align 64
959
947
.Lssse3_data:
960
948
.byte 2 ,3 ,0 ,1 ,6 ,7 ,4 ,5 ,10 ,11 ,8 ,9 ,14 ,15 ,12 ,13
0 commit comments