openssl: more asm
This commit is contained in:
parent
85c19175ed
commit
04b92632bd
12
deps/openssl/asm/Makefile
vendored
12
deps/openssl/asm/Makefile
vendored
@ -26,6 +26,8 @@ OUTPUTS = \
|
||||
x64-elf-gas/aes/aesni-sha1-x86_64.s \
|
||||
x64-elf-gas/bn/modexp512-x86_64.s \
|
||||
x64-elf-gas/bn/x86_64-mont.s \
|
||||
x64-elf-gas/bn/x86_64-mont5.s \
|
||||
x64-elf-gas/bn/x86_64-gf2m.s \
|
||||
x64-elf-gas/camellia/cmll-x86_64.s \
|
||||
x64-elf-gas/md5/md5-x86_64.s \
|
||||
x64-elf-gas/rc4/rc4-x86_64.s \
|
||||
@ -58,6 +60,8 @@ OUTPUTS = \
|
||||
x64-macosx-gas/aes/aesni-sha1-x86_64.s \
|
||||
x64-macosx-gas/bn/modexp512-x86_64.s \
|
||||
x64-macosx-gas/bn/x86_64-mont.s \
|
||||
x64-macosx-gas/bn/x86_64-mont5.s \
|
||||
x64-macosx-gas/bn/x86_64-gf2m.s \
|
||||
x64-macosx-gas/camellia/cmll-x86_64.s \
|
||||
x64-macosx-gas/md5/md5-x86_64.s \
|
||||
x64-macosx-gas/rc4/rc4-x86_64.s \
|
||||
@ -90,6 +94,8 @@ OUTPUTS = \
|
||||
x64-win32-masm/aes/aesni-sha1-x86_64.asm \
|
||||
x64-win32-masm/bn/modexp512-x86_64.asm \
|
||||
x64-win32-masm/bn/x86_64-mont.asm \
|
||||
x64-win32-masm/bn/x86_64-mont5.asm \
|
||||
x64-win32-masm/bn/x86_64-gf2m.asm \
|
||||
x64-win32-masm/camellia/cmll-x86_64.asm \
|
||||
x64-win32-masm/md5/md5-x86_64.asm \
|
||||
x64-win32-masm/rc4/rc4-x86_64.asm \
|
||||
@ -129,6 +135,8 @@ x64-elf-gas/aes/aesni-x86_64.s: ../openssl/crypto/aes/asm/aesni-x86_64.pl
|
||||
x64-elf-gas/aes/aesni-sha1-x86_64.s: ../openssl/crypto/aes/asm/aesni-sha1-x86_64.pl
|
||||
x64-elf-gas/bn/modexp512-x86_64.s: ../openssl/crypto/bn/asm/modexp512-x86_64.pl
|
||||
x64-elf-gas/bn/x86_64-mont.s: ../openssl/crypto/bn/asm/x86_64-mont.pl
|
||||
x64-elf-gas/bn/x86_64-mont5.s: ../openssl/crypto/bn/asm/x86_64-mont5.pl
|
||||
x64-elf-gas/bn/x86_64-gf2m.s: ../openssl/crypto/bn/asm/x86_64-gf2m.pl
|
||||
x64-elf-gas/camellia/cmll-x86_64.s: ../openssl/crypto/camellia/asm/cmll-x86_64.pl
|
||||
x64-elf-gas/md5/md5-x86_64.s: ../openssl/crypto/md5/asm/md5-x86_64.pl
|
||||
x64-elf-gas/rc4/rc4-x86_64.s: ../openssl/crypto/rc4/asm/rc4-x86_64.pl
|
||||
@ -143,6 +151,8 @@ x64-macosx-gas/aes/aesni-x86_64.s: ../openssl/crypto/aes/asm/aesni-x86_64.pl
|
||||
x64-macosx-gas/aes/aesni-sha1-x86_64.s: ../openssl/crypto/aes/asm/aesni-sha1-x86_64.pl
|
||||
x64-macosx-gas/bn/modexp512-x86_64.s: ../openssl/crypto/bn/asm/modexp512-x86_64.pl
|
||||
x64-macosx-gas/bn/x86_64-mont.s: ../openssl/crypto/bn/asm/x86_64-mont.pl
|
||||
x64-macosx-gas/bn/x86_64-mont5.s: ../openssl/crypto/bn/asm/x86_64-mont5.pl
|
||||
x64-macosx-gas/bn/x86_64-gf2m.s: ../openssl/crypto/bn/asm/x86_64-gf2m.pl
|
||||
x64-macosx-gas/camellia/cmll-x86_64.s: ../openssl/crypto/camellia/asm/cmll-x86_64.pl
|
||||
x64-macosx-gas/md5/md5-x86_64.s: ../openssl/crypto/md5/asm/md5-x86_64.pl
|
||||
x64-macosx-gas/rc4/rc4-x86_64.s: ../openssl/crypto/rc4/asm/rc4-x86_64.pl
|
||||
@ -157,6 +167,8 @@ x64-win32-masm/aes/aesni-x86_64.asm: ../openssl/crypto/aes/asm/aesni-x86_64.pl
|
||||
x64-win32-masm/aes/aesni-sha1-x86_64.asm: ../openssl/crypto/aes/asm/aesni-sha1-x86_64.pl
|
||||
x64-win32-masm/bn/modexp512-x86_64.asm: ../openssl/crypto/bn/asm/modexp512-x86_64.pl
|
||||
x64-win32-masm/bn/x86_64-mont.asm: ../openssl/crypto/bn/asm/x86_64-mont.pl
|
||||
x64-win32-masm/bn/x86_64-mont5.asm: ../openssl/crypto/bn/asm/x86_64-mont5.pl
|
||||
x64-win32-masm/bn/x86_64-gf2m.asm: ../openssl/crypto/bn/asm/x86_64-gf2m.pl
|
||||
x64-win32-masm/camellia/cmll-x86_64.asm: ../openssl/crypto/camellia/asm/cmll-x86_64.pl
|
||||
x64-win32-masm/md5/md5-x86_64.asm: ../openssl/crypto/md5/asm/md5-x86_64.pl
|
||||
x64-win32-masm/rc4/rc4-x86_64.asm: ../openssl/crypto/rc4/asm/rc4-x86_64.pl
|
||||
|
295
deps/openssl/asm/x64-elf-gas/bn/x86_64-gf2m.s
vendored
Normal file
295
deps/openssl/asm/x64-elf-gas/bn/x86_64-gf2m.s
vendored
Normal file
@ -0,0 +1,295 @@
|
||||
.text
|
||||
|
||||
|
||||
.type _mul_1x1,@function
|
||||
.align 16
|
||||
_mul_1x1:
|
||||
subq $128+8,%rsp
|
||||
movq $-1,%r9
|
||||
leaq (%rax,%rax,1),%rsi
|
||||
shrq $3,%r9
|
||||
leaq (,%rax,4),%rdi
|
||||
andq %rax,%r9
|
||||
leaq (,%rax,8),%r12
|
||||
sarq $63,%rax
|
||||
leaq (%r9,%r9,1),%r10
|
||||
sarq $63,%rsi
|
||||
leaq (,%r9,4),%r11
|
||||
andq %rbp,%rax
|
||||
sarq $63,%rdi
|
||||
movq %rax,%rdx
|
||||
shlq $63,%rax
|
||||
andq %rbp,%rsi
|
||||
shrq $1,%rdx
|
||||
movq %rsi,%rcx
|
||||
shlq $62,%rsi
|
||||
andq %rbp,%rdi
|
||||
shrq $2,%rcx
|
||||
xorq %rsi,%rax
|
||||
movq %rdi,%rbx
|
||||
shlq $61,%rdi
|
||||
xorq %rcx,%rdx
|
||||
shrq $3,%rbx
|
||||
xorq %rdi,%rax
|
||||
xorq %rbx,%rdx
|
||||
|
||||
movq %r9,%r13
|
||||
movq $0,0(%rsp)
|
||||
xorq %r10,%r13
|
||||
movq %r9,8(%rsp)
|
||||
movq %r11,%r14
|
||||
movq %r10,16(%rsp)
|
||||
xorq %r12,%r14
|
||||
movq %r13,24(%rsp)
|
||||
|
||||
xorq %r11,%r9
|
||||
movq %r11,32(%rsp)
|
||||
xorq %r11,%r10
|
||||
movq %r9,40(%rsp)
|
||||
xorq %r11,%r13
|
||||
movq %r10,48(%rsp)
|
||||
xorq %r14,%r9
|
||||
movq %r13,56(%rsp)
|
||||
xorq %r14,%r10
|
||||
|
||||
movq %r12,64(%rsp)
|
||||
xorq %r14,%r13
|
||||
movq %r9,72(%rsp)
|
||||
xorq %r11,%r9
|
||||
movq %r10,80(%rsp)
|
||||
xorq %r11,%r10
|
||||
movq %r13,88(%rsp)
|
||||
|
||||
xorq %r11,%r13
|
||||
movq %r14,96(%rsp)
|
||||
movq %r8,%rsi
|
||||
movq %r9,104(%rsp)
|
||||
andq %rbp,%rsi
|
||||
movq %r10,112(%rsp)
|
||||
shrq $4,%rbp
|
||||
movq %r13,120(%rsp)
|
||||
movq %r8,%rdi
|
||||
andq %rbp,%rdi
|
||||
shrq $4,%rbp
|
||||
|
||||
movq (%rsp,%rsi,8),%xmm0
|
||||
movq %r8,%rsi
|
||||
andq %rbp,%rsi
|
||||
shrq $4,%rbp
|
||||
movq (%rsp,%rdi,8),%rcx
|
||||
movq %r8,%rdi
|
||||
movq %rcx,%rbx
|
||||
shlq $4,%rcx
|
||||
andq %rbp,%rdi
|
||||
movq (%rsp,%rsi,8),%xmm1
|
||||
shrq $60,%rbx
|
||||
xorq %rcx,%rax
|
||||
pslldq $1,%xmm1
|
||||
movq %r8,%rsi
|
||||
shrq $4,%rbp
|
||||
xorq %rbx,%rdx
|
||||
andq %rbp,%rsi
|
||||
shrq $4,%rbp
|
||||
pxor %xmm1,%xmm0
|
||||
movq (%rsp,%rdi,8),%rcx
|
||||
movq %r8,%rdi
|
||||
movq %rcx,%rbx
|
||||
shlq $12,%rcx
|
||||
andq %rbp,%rdi
|
||||
movq (%rsp,%rsi,8),%xmm1
|
||||
shrq $52,%rbx
|
||||
xorq %rcx,%rax
|
||||
pslldq $2,%xmm1
|
||||
movq %r8,%rsi
|
||||
shrq $4,%rbp
|
||||
xorq %rbx,%rdx
|
||||
andq %rbp,%rsi
|
||||
shrq $4,%rbp
|
||||
pxor %xmm1,%xmm0
|
||||
movq (%rsp,%rdi,8),%rcx
|
||||
movq %r8,%rdi
|
||||
movq %rcx,%rbx
|
||||
shlq $20,%rcx
|
||||
andq %rbp,%rdi
|
||||
movq (%rsp,%rsi,8),%xmm1
|
||||
shrq $44,%rbx
|
||||
xorq %rcx,%rax
|
||||
pslldq $3,%xmm1
|
||||
movq %r8,%rsi
|
||||
shrq $4,%rbp
|
||||
xorq %rbx,%rdx
|
||||
andq %rbp,%rsi
|
||||
shrq $4,%rbp
|
||||
pxor %xmm1,%xmm0
|
||||
movq (%rsp,%rdi,8),%rcx
|
||||
movq %r8,%rdi
|
||||
movq %rcx,%rbx
|
||||
shlq $28,%rcx
|
||||
andq %rbp,%rdi
|
||||
movq (%rsp,%rsi,8),%xmm1
|
||||
shrq $36,%rbx
|
||||
xorq %rcx,%rax
|
||||
pslldq $4,%xmm1
|
||||
movq %r8,%rsi
|
||||
shrq $4,%rbp
|
||||
xorq %rbx,%rdx
|
||||
andq %rbp,%rsi
|
||||
shrq $4,%rbp
|
||||
pxor %xmm1,%xmm0
|
||||
movq (%rsp,%rdi,8),%rcx
|
||||
movq %r8,%rdi
|
||||
movq %rcx,%rbx
|
||||
shlq $36,%rcx
|
||||
andq %rbp,%rdi
|
||||
movq (%rsp,%rsi,8),%xmm1
|
||||
shrq $28,%rbx
|
||||
xorq %rcx,%rax
|
||||
pslldq $5,%xmm1
|
||||
movq %r8,%rsi
|
||||
shrq $4,%rbp
|
||||
xorq %rbx,%rdx
|
||||
andq %rbp,%rsi
|
||||
shrq $4,%rbp
|
||||
pxor %xmm1,%xmm0
|
||||
movq (%rsp,%rdi,8),%rcx
|
||||
movq %r8,%rdi
|
||||
movq %rcx,%rbx
|
||||
shlq $44,%rcx
|
||||
andq %rbp,%rdi
|
||||
movq (%rsp,%rsi,8),%xmm1
|
||||
shrq $20,%rbx
|
||||
xorq %rcx,%rax
|
||||
pslldq $6,%xmm1
|
||||
movq %r8,%rsi
|
||||
shrq $4,%rbp
|
||||
xorq %rbx,%rdx
|
||||
andq %rbp,%rsi
|
||||
shrq $4,%rbp
|
||||
pxor %xmm1,%xmm0
|
||||
movq (%rsp,%rdi,8),%rcx
|
||||
movq %r8,%rdi
|
||||
movq %rcx,%rbx
|
||||
shlq $52,%rcx
|
||||
andq %rbp,%rdi
|
||||
movq (%rsp,%rsi,8),%xmm1
|
||||
shrq $12,%rbx
|
||||
xorq %rcx,%rax
|
||||
pslldq $7,%xmm1
|
||||
movq %r8,%rsi
|
||||
shrq $4,%rbp
|
||||
xorq %rbx,%rdx
|
||||
andq %rbp,%rsi
|
||||
shrq $4,%rbp
|
||||
pxor %xmm1,%xmm0
|
||||
movq (%rsp,%rdi,8),%rcx
|
||||
movq %rcx,%rbx
|
||||
shlq $60,%rcx
|
||||
.byte 102,72,15,126,198
|
||||
shrq $4,%rbx
|
||||
xorq %rcx,%rax
|
||||
psrldq $8,%xmm0
|
||||
xorq %rbx,%rdx
|
||||
.byte 102,72,15,126,199
|
||||
xorq %rsi,%rax
|
||||
xorq %rdi,%rdx
|
||||
|
||||
addq $128+8,%rsp
|
||||
.byte 0xf3,0xc3
|
||||
.Lend_mul_1x1:
|
||||
.size _mul_1x1,.-_mul_1x1
|
||||
|
||||
.globl bn_GF2m_mul_2x2
|
||||
.type bn_GF2m_mul_2x2,@function
|
||||
.align 16
|
||||
bn_GF2m_mul_2x2:
|
||||
movq OPENSSL_ia32cap_P(%rip),%rax
|
||||
btq $33,%rax
|
||||
jnc .Lvanilla_mul_2x2
|
||||
|
||||
.byte 102,72,15,110,198
|
||||
.byte 102,72,15,110,201
|
||||
.byte 102,72,15,110,210
|
||||
.byte 102,73,15,110,216
|
||||
movdqa %xmm0,%xmm4
|
||||
movdqa %xmm1,%xmm5
|
||||
.byte 102,15,58,68,193,0
|
||||
pxor %xmm2,%xmm4
|
||||
pxor %xmm3,%xmm5
|
||||
.byte 102,15,58,68,211,0
|
||||
.byte 102,15,58,68,229,0
|
||||
xorps %xmm0,%xmm4
|
||||
xorps %xmm2,%xmm4
|
||||
movdqa %xmm4,%xmm5
|
||||
pslldq $8,%xmm4
|
||||
psrldq $8,%xmm5
|
||||
pxor %xmm4,%xmm2
|
||||
pxor %xmm5,%xmm0
|
||||
movdqu %xmm2,0(%rdi)
|
||||
movdqu %xmm0,16(%rdi)
|
||||
.byte 0xf3,0xc3
|
||||
|
||||
.align 16
|
||||
.Lvanilla_mul_2x2:
|
||||
leaq -136(%rsp),%rsp
|
||||
movq %r14,80(%rsp)
|
||||
movq %r13,88(%rsp)
|
||||
movq %r12,96(%rsp)
|
||||
movq %rbp,104(%rsp)
|
||||
movq %rbx,112(%rsp)
|
||||
.Lbody_mul_2x2:
|
||||
movq %rdi,32(%rsp)
|
||||
movq %rsi,40(%rsp)
|
||||
movq %rdx,48(%rsp)
|
||||
movq %rcx,56(%rsp)
|
||||
movq %r8,64(%rsp)
|
||||
|
||||
movq $15,%r8
|
||||
movq %rsi,%rax
|
||||
movq %rcx,%rbp
|
||||
call _mul_1x1
|
||||
|
||||
movq %rax,16(%rsp)
|
||||
movq %rdx,24(%rsp)
|
||||
|
||||
movq 48(%rsp),%rax
|
||||
movq 64(%rsp),%rbp
|
||||
call _mul_1x1
|
||||
|
||||
movq %rax,0(%rsp)
|
||||
movq %rdx,8(%rsp)
|
||||
|
||||
movq 40(%rsp),%rax
|
||||
movq 56(%rsp),%rbp
|
||||
xorq 48(%rsp),%rax
|
||||
xorq 64(%rsp),%rbp
|
||||
call _mul_1x1
|
||||
|
||||
movq 0(%rsp),%rbx
|
||||
movq 8(%rsp),%rcx
|
||||
movq 16(%rsp),%rdi
|
||||
movq 24(%rsp),%rsi
|
||||
movq 32(%rsp),%rbp
|
||||
|
||||
xorq %rdx,%rax
|
||||
xorq %rcx,%rdx
|
||||
xorq %rbx,%rax
|
||||
movq %rbx,0(%rbp)
|
||||
xorq %rdi,%rdx
|
||||
movq %rsi,24(%rbp)
|
||||
xorq %rsi,%rax
|
||||
xorq %rsi,%rdx
|
||||
xorq %rdx,%rax
|
||||
movq %rdx,16(%rbp)
|
||||
movq %rax,8(%rbp)
|
||||
|
||||
movq 80(%rsp),%r14
|
||||
movq 88(%rsp),%r13
|
||||
movq 96(%rsp),%r12
|
||||
movq 104(%rsp),%rbp
|
||||
movq 112(%rsp),%rbx
|
||||
leaq 136(%rsp),%rsp
|
||||
.byte 0xf3,0xc3
|
||||
.Lend_mul_2x2:
|
||||
.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
|
||||
.byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 16
|
785
deps/openssl/asm/x64-elf-gas/bn/x86_64-mont5.s
vendored
Normal file
785
deps/openssl/asm/x64-elf-gas/bn/x86_64-mont5.s
vendored
Normal file
@ -0,0 +1,785 @@
|
||||
.text
|
||||
|
||||
|
||||
.globl bn_mul_mont_gather5
|
||||
.type bn_mul_mont_gather5,@function
|
||||
.align 64
|
||||
bn_mul_mont_gather5:
|
||||
testl $3,%r9d
|
||||
jnz .Lmul_enter
|
||||
cmpl $8,%r9d
|
||||
jb .Lmul_enter
|
||||
jmp .Lmul4x_enter
|
||||
|
||||
.align 16
|
||||
.Lmul_enter:
|
||||
movl %r9d,%r9d
|
||||
movl 8(%rsp),%r10d
|
||||
pushq %rbx
|
||||
pushq %rbp
|
||||
pushq %r12
|
||||
pushq %r13
|
||||
pushq %r14
|
||||
pushq %r15
|
||||
movq %rsp,%rax
|
||||
leaq 2(%r9),%r11
|
||||
negq %r11
|
||||
leaq (%rsp,%r11,8),%rsp
|
||||
andq $-1024,%rsp
|
||||
|
||||
movq %rax,8(%rsp,%r9,8)
|
||||
.Lmul_body:
|
||||
movq %rdx,%r12
|
||||
movq %r10,%r11
|
||||
shrq $3,%r10
|
||||
andq $7,%r11
|
||||
notq %r10
|
||||
leaq .Lmagic_masks(%rip),%rax
|
||||
andq $3,%r10
|
||||
leaq 96(%r12,%r11,8),%r12
|
||||
movq 0(%rax,%r10,8),%xmm4
|
||||
movq 8(%rax,%r10,8),%xmm5
|
||||
movq 16(%rax,%r10,8),%xmm6
|
||||
movq 24(%rax,%r10,8),%xmm7
|
||||
|
||||
movq -96(%r12),%xmm0
|
||||
movq -32(%r12),%xmm1
|
||||
pand %xmm4,%xmm0
|
||||
movq 32(%r12),%xmm2
|
||||
pand %xmm5,%xmm1
|
||||
movq 96(%r12),%xmm3
|
||||
pand %xmm6,%xmm2
|
||||
por %xmm1,%xmm0
|
||||
pand %xmm7,%xmm3
|
||||
por %xmm2,%xmm0
|
||||
leaq 256(%r12),%r12
|
||||
por %xmm3,%xmm0
|
||||
|
||||
.byte 102,72,15,126,195
|
||||
|
||||
movq (%r8),%r8
|
||||
movq (%rsi),%rax
|
||||
|
||||
xorq %r14,%r14
|
||||
xorq %r15,%r15
|
||||
|
||||
movq -96(%r12),%xmm0
|
||||
movq -32(%r12),%xmm1
|
||||
pand %xmm4,%xmm0
|
||||
movq 32(%r12),%xmm2
|
||||
pand %xmm5,%xmm1
|
||||
|
||||
movq %r8,%rbp
|
||||
mulq %rbx
|
||||
movq %rax,%r10
|
||||
movq (%rcx),%rax
|
||||
|
||||
movq 96(%r12),%xmm3
|
||||
pand %xmm6,%xmm2
|
||||
por %xmm1,%xmm0
|
||||
pand %xmm7,%xmm3
|
||||
|
||||
imulq %r10,%rbp
|
||||
movq %rdx,%r11
|
||||
|
||||
por %xmm2,%xmm0
|
||||
leaq 256(%r12),%r12
|
||||
por %xmm3,%xmm0
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%r10
|
||||
movq 8(%rsi),%rax
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%r13
|
||||
|
||||
leaq 1(%r15),%r15
|
||||
jmp .L1st_enter
|
||||
|
||||
.align 16
|
||||
.L1st:
|
||||
addq %rax,%r13
|
||||
movq (%rsi,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r11,%r13
|
||||
movq %r10,%r11
|
||||
adcq $0,%rdx
|
||||
movq %r13,-16(%rsp,%r15,8)
|
||||
movq %rdx,%r13
|
||||
|
||||
.L1st_enter:
|
||||
mulq %rbx
|
||||
addq %rax,%r11
|
||||
movq (%rcx,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
leaq 1(%r15),%r15
|
||||
movq %rdx,%r10
|
||||
|
||||
mulq %rbp
|
||||
cmpq %r9,%r15
|
||||
jne .L1st
|
||||
|
||||
.byte 102,72,15,126,195
|
||||
|
||||
addq %rax,%r13
|
||||
movq (%rsi),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r11,%r13
|
||||
adcq $0,%rdx
|
||||
movq %r13,-16(%rsp,%r15,8)
|
||||
movq %rdx,%r13
|
||||
movq %r10,%r11
|
||||
|
||||
xorq %rdx,%rdx
|
||||
addq %r11,%r13
|
||||
adcq $0,%rdx
|
||||
movq %r13,-8(%rsp,%r9,8)
|
||||
movq %rdx,(%rsp,%r9,8)
|
||||
|
||||
leaq 1(%r14),%r14
|
||||
jmp .Louter
|
||||
.align 16
|
||||
.Louter:
|
||||
xorq %r15,%r15
|
||||
movq %r8,%rbp
|
||||
movq (%rsp),%r10
|
||||
|
||||
movq -96(%r12),%xmm0
|
||||
movq -32(%r12),%xmm1
|
||||
pand %xmm4,%xmm0
|
||||
movq 32(%r12),%xmm2
|
||||
pand %xmm5,%xmm1
|
||||
|
||||
mulq %rbx
|
||||
addq %rax,%r10
|
||||
movq (%rcx),%rax
|
||||
adcq $0,%rdx
|
||||
|
||||
movq 96(%r12),%xmm3
|
||||
pand %xmm6,%xmm2
|
||||
por %xmm1,%xmm0
|
||||
pand %xmm7,%xmm3
|
||||
|
||||
imulq %r10,%rbp
|
||||
movq %rdx,%r11
|
||||
|
||||
por %xmm2,%xmm0
|
||||
leaq 256(%r12),%r12
|
||||
por %xmm3,%xmm0
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%r10
|
||||
movq 8(%rsi),%rax
|
||||
adcq $0,%rdx
|
||||
movq 8(%rsp),%r10
|
||||
movq %rdx,%r13
|
||||
|
||||
leaq 1(%r15),%r15
|
||||
jmp .Linner_enter
|
||||
|
||||
.align 16
|
||||
.Linner:
|
||||
addq %rax,%r13
|
||||
movq (%rsi,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r10,%r13
|
||||
movq (%rsp,%r15,8),%r10
|
||||
adcq $0,%rdx
|
||||
movq %r13,-16(%rsp,%r15,8)
|
||||
movq %rdx,%r13
|
||||
|
||||
.Linner_enter:
|
||||
mulq %rbx
|
||||
addq %rax,%r11
|
||||
movq (%rcx,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r11,%r10
|
||||
movq %rdx,%r11
|
||||
adcq $0,%r11
|
||||
leaq 1(%r15),%r15
|
||||
|
||||
mulq %rbp
|
||||
cmpq %r9,%r15
|
||||
jne .Linner
|
||||
|
||||
.byte 102,72,15,126,195
|
||||
|
||||
addq %rax,%r13
|
||||
movq (%rsi),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r10,%r13
|
||||
movq (%rsp,%r15,8),%r10
|
||||
adcq $0,%rdx
|
||||
movq %r13,-16(%rsp,%r15,8)
|
||||
movq %rdx,%r13
|
||||
|
||||
xorq %rdx,%rdx
|
||||
addq %r11,%r13
|
||||
adcq $0,%rdx
|
||||
addq %r10,%r13
|
||||
adcq $0,%rdx
|
||||
movq %r13,-8(%rsp,%r9,8)
|
||||
movq %rdx,(%rsp,%r9,8)
|
||||
|
||||
leaq 1(%r14),%r14
|
||||
cmpq %r9,%r14
|
||||
jl .Louter
|
||||
|
||||
xorq %r14,%r14
|
||||
movq (%rsp),%rax
|
||||
leaq (%rsp),%rsi
|
||||
movq %r9,%r15
|
||||
jmp .Lsub
|
||||
.align 16
|
||||
.Lsub: sbbq (%rcx,%r14,8),%rax
|
||||
movq %rax,(%rdi,%r14,8)
|
||||
movq 8(%rsi,%r14,8),%rax
|
||||
leaq 1(%r14),%r14
|
||||
decq %r15
|
||||
jnz .Lsub
|
||||
|
||||
sbbq $0,%rax
|
||||
xorq %r14,%r14
|
||||
andq %rax,%rsi
|
||||
notq %rax
|
||||
movq %rdi,%rcx
|
||||
andq %rax,%rcx
|
||||
movq %r9,%r15
|
||||
orq %rcx,%rsi
|
||||
.align 16
|
||||
.Lcopy:
|
||||
movq (%rsi,%r14,8),%rax
|
||||
movq %r14,(%rsp,%r14,8)
|
||||
movq %rax,(%rdi,%r14,8)
|
||||
leaq 1(%r14),%r14
|
||||
subq $1,%r15
|
||||
jnz .Lcopy
|
||||
|
||||
movq 8(%rsp,%r9,8),%rsi
|
||||
movq $1,%rax
|
||||
movq (%rsi),%r15
|
||||
movq 8(%rsi),%r14
|
||||
movq 16(%rsi),%r13
|
||||
movq 24(%rsi),%r12
|
||||
movq 32(%rsi),%rbp
|
||||
movq 40(%rsi),%rbx
|
||||
leaq 48(%rsi),%rsp
|
||||
.Lmul_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
.size bn_mul_mont_gather5,.-bn_mul_mont_gather5
|
||||
.type bn_mul4x_mont_gather5,@function
|
||||
.align 16
|
||||
bn_mul4x_mont_gather5:
|
||||
.Lmul4x_enter:
|
||||
movl %r9d,%r9d
|
||||
movl 8(%rsp),%r10d
|
||||
pushq %rbx
|
||||
pushq %rbp
|
||||
pushq %r12
|
||||
pushq %r13
|
||||
pushq %r14
|
||||
pushq %r15
|
||||
movq %rsp,%rax
|
||||
leaq 4(%r9),%r11
|
||||
negq %r11
|
||||
leaq (%rsp,%r11,8),%rsp
|
||||
andq $-1024,%rsp
|
||||
|
||||
movq %rax,8(%rsp,%r9,8)
|
||||
.Lmul4x_body:
|
||||
movq %rdi,16(%rsp,%r9,8)
|
||||
movq %rdx,%r12
|
||||
movq %r10,%r11
|
||||
shrq $3,%r10
|
||||
andq $7,%r11
|
||||
notq %r10
|
||||
leaq .Lmagic_masks(%rip),%rax
|
||||
andq $3,%r10
|
||||
leaq 96(%r12,%r11,8),%r12
|
||||
movq 0(%rax,%r10,8),%xmm4
|
||||
movq 8(%rax,%r10,8),%xmm5
|
||||
movq 16(%rax,%r10,8),%xmm6
|
||||
movq 24(%rax,%r10,8),%xmm7
|
||||
|
||||
movq -96(%r12),%xmm0
|
||||
movq -32(%r12),%xmm1
|
||||
pand %xmm4,%xmm0
|
||||
movq 32(%r12),%xmm2
|
||||
pand %xmm5,%xmm1
|
||||
movq 96(%r12),%xmm3
|
||||
pand %xmm6,%xmm2
|
||||
por %xmm1,%xmm0
|
||||
pand %xmm7,%xmm3
|
||||
por %xmm2,%xmm0
|
||||
leaq 256(%r12),%r12
|
||||
por %xmm3,%xmm0
|
||||
|
||||
.byte 102,72,15,126,195
|
||||
movq (%r8),%r8
|
||||
movq (%rsi),%rax
|
||||
|
||||
xorq %r14,%r14
|
||||
xorq %r15,%r15
|
||||
|
||||
movq -96(%r12),%xmm0
|
||||
movq -32(%r12),%xmm1
|
||||
pand %xmm4,%xmm0
|
||||
movq 32(%r12),%xmm2
|
||||
pand %xmm5,%xmm1
|
||||
|
||||
movq %r8,%rbp
|
||||
mulq %rbx
|
||||
movq %rax,%r10
|
||||
movq (%rcx),%rax
|
||||
|
||||
movq 96(%r12),%xmm3
|
||||
pand %xmm6,%xmm2
|
||||
por %xmm1,%xmm0
|
||||
pand %xmm7,%xmm3
|
||||
|
||||
imulq %r10,%rbp
|
||||
movq %rdx,%r11
|
||||
|
||||
por %xmm2,%xmm0
|
||||
leaq 256(%r12),%r12
|
||||
por %xmm3,%xmm0
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%r10
|
||||
movq 8(%rsi),%rax
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%rdi
|
||||
|
||||
mulq %rbx
|
||||
addq %rax,%r11
|
||||
movq 8(%rcx),%rax
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%r10
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%rdi
|
||||
movq 16(%rsi),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r11,%rdi
|
||||
leaq 4(%r15),%r15
|
||||
adcq $0,%rdx
|
||||
movq %rdi,(%rsp)
|
||||
movq %rdx,%r13
|
||||
jmp .L1st4x
|
||||
.align 16
|
||||
.L1st4x:
|
||||
mulq %rbx
|
||||
addq %rax,%r10
|
||||
movq -16(%rcx,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%r11
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%r13
|
||||
movq -8(%rsi,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r10,%r13
|
||||
adcq $0,%rdx
|
||||
movq %r13,-24(%rsp,%r15,8)
|
||||
movq %rdx,%rdi
|
||||
|
||||
mulq %rbx
|
||||
addq %rax,%r11
|
||||
movq -8(%rcx,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%r10
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%rdi
|
||||
movq (%rsi,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r11,%rdi
|
||||
adcq $0,%rdx
|
||||
movq %rdi,-16(%rsp,%r15,8)
|
||||
movq %rdx,%r13
|
||||
|
||||
mulq %rbx
|
||||
addq %rax,%r10
|
||||
movq (%rcx,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%r11
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%r13
|
||||
movq 8(%rsi,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r10,%r13
|
||||
adcq $0,%rdx
|
||||
movq %r13,-8(%rsp,%r15,8)
|
||||
movq %rdx,%rdi
|
||||
|
||||
mulq %rbx
|
||||
addq %rax,%r11
|
||||
movq 8(%rcx,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
leaq 4(%r15),%r15
|
||||
movq %rdx,%r10
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%rdi
|
||||
movq -16(%rsi,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r11,%rdi
|
||||
adcq $0,%rdx
|
||||
movq %rdi,-32(%rsp,%r15,8)
|
||||
movq %rdx,%r13
|
||||
cmpq %r9,%r15
|
||||
jl .L1st4x
|
||||
|
||||
mulq %rbx
|
||||
addq %rax,%r10
|
||||
movq -16(%rcx,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%r11
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%r13
|
||||
movq -8(%rsi,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r10,%r13
|
||||
adcq $0,%rdx
|
||||
movq %r13,-24(%rsp,%r15,8)
|
||||
movq %rdx,%rdi
|
||||
|
||||
mulq %rbx
|
||||
addq %rax,%r11
|
||||
movq -8(%rcx,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%r10
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%rdi
|
||||
movq (%rsi),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r11,%rdi
|
||||
adcq $0,%rdx
|
||||
movq %rdi,-16(%rsp,%r15,8)
|
||||
movq %rdx,%r13
|
||||
|
||||
.byte 102,72,15,126,195
|
||||
|
||||
xorq %rdi,%rdi
|
||||
addq %r10,%r13
|
||||
adcq $0,%rdi
|
||||
movq %r13,-8(%rsp,%r15,8)
|
||||
movq %rdi,(%rsp,%r15,8)
|
||||
|
||||
leaq 1(%r14),%r14
|
||||
.align 4
|
||||
.Louter4x:
|
||||
xorq %r15,%r15
|
||||
movq -96(%r12),%xmm0
|
||||
movq -32(%r12),%xmm1
|
||||
pand %xmm4,%xmm0
|
||||
movq 32(%r12),%xmm2
|
||||
pand %xmm5,%xmm1
|
||||
|
||||
movq (%rsp),%r10
|
||||
movq %r8,%rbp
|
||||
mulq %rbx
|
||||
addq %rax,%r10
|
||||
movq (%rcx),%rax
|
||||
adcq $0,%rdx
|
||||
|
||||
movq 96(%r12),%xmm3
|
||||
pand %xmm6,%xmm2
|
||||
por %xmm1,%xmm0
|
||||
pand %xmm7,%xmm3
|
||||
|
||||
imulq %r10,%rbp
|
||||
movq %rdx,%r11
|
||||
|
||||
por %xmm2,%xmm0
|
||||
leaq 256(%r12),%r12
|
||||
por %xmm3,%xmm0
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%r10
|
||||
movq 8(%rsi),%rax
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%rdi
|
||||
|
||||
mulq %rbx
|
||||
addq %rax,%r11
|
||||
movq 8(%rcx),%rax
|
||||
adcq $0,%rdx
|
||||
addq 8(%rsp),%r11
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%r10
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%rdi
|
||||
movq 16(%rsi),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r11,%rdi
|
||||
leaq 4(%r15),%r15
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%r13
|
||||
jmp .Linner4x
|
||||
.align 16
|
||||
.Linner4x:
|
||||
mulq %rbx
|
||||
addq %rax,%r10
|
||||
movq -16(%rcx,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq -16(%rsp,%r15,8),%r10
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%r11
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%r13
|
||||
movq -8(%rsi,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r10,%r13
|
||||
adcq $0,%rdx
|
||||
movq %rdi,-32(%rsp,%r15,8)
|
||||
movq %rdx,%rdi
|
||||
|
||||
mulq %rbx
|
||||
addq %rax,%r11
|
||||
movq -8(%rcx,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq -8(%rsp,%r15,8),%r11
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%r10
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%rdi
|
||||
movq (%rsi,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r11,%rdi
|
||||
adcq $0,%rdx
|
||||
movq %r13,-24(%rsp,%r15,8)
|
||||
movq %rdx,%r13
|
||||
|
||||
mulq %rbx
|
||||
addq %rax,%r10
|
||||
movq (%rcx,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq (%rsp,%r15,8),%r10
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%r11
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%r13
|
||||
movq 8(%rsi,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r10,%r13
|
||||
adcq $0,%rdx
|
||||
movq %rdi,-16(%rsp,%r15,8)
|
||||
movq %rdx,%rdi
|
||||
|
||||
mulq %rbx
|
||||
addq %rax,%r11
|
||||
movq 8(%rcx,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq 8(%rsp,%r15,8),%r11
|
||||
adcq $0,%rdx
|
||||
leaq 4(%r15),%r15
|
||||
movq %rdx,%r10
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%rdi
|
||||
movq -16(%rsi,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r11,%rdi
|
||||
adcq $0,%rdx
|
||||
movq %r13,-40(%rsp,%r15,8)
|
||||
movq %rdx,%r13
|
||||
cmpq %r9,%r15
|
||||
jl .Linner4x
|
||||
|
||||
mulq %rbx
|
||||
addq %rax,%r10
|
||||
movq -16(%rcx,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq -16(%rsp,%r15,8),%r10
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%r11
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%r13
|
||||
movq -8(%rsi,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r10,%r13
|
||||
adcq $0,%rdx
|
||||
movq %rdi,-32(%rsp,%r15,8)
|
||||
movq %rdx,%rdi
|
||||
|
||||
mulq %rbx
|
||||
addq %rax,%r11
|
||||
movq -8(%rcx,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq -8(%rsp,%r15,8),%r11
|
||||
adcq $0,%rdx
|
||||
leaq 1(%r14),%r14
|
||||
movq %rdx,%r10
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%rdi
|
||||
movq (%rsi),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r11,%rdi
|
||||
adcq $0,%rdx
|
||||
movq %r13,-24(%rsp,%r15,8)
|
||||
movq %rdx,%r13
|
||||
|
||||
.byte 102,72,15,126,195
|
||||
movq %rdi,-16(%rsp,%r15,8)
|
||||
|
||||
xorq %rdi,%rdi
|
||||
addq %r10,%r13
|
||||
adcq $0,%rdi
|
||||
addq (%rsp,%r9,8),%r13
|
||||
adcq $0,%rdi
|
||||
movq %r13,-8(%rsp,%r15,8)
|
||||
movq %rdi,(%rsp,%r15,8)
|
||||
|
||||
cmpq %r9,%r14
|
||||
jl .Louter4x
|
||||
movq 16(%rsp,%r9,8),%rdi
|
||||
movq 0(%rsp),%rax
|
||||
pxor %xmm0,%xmm0
|
||||
movq 8(%rsp),%rdx
|
||||
shrq $2,%r9
|
||||
leaq (%rsp),%rsi
|
||||
xorq %r14,%r14
|
||||
|
||||
subq 0(%rcx),%rax
|
||||
movq 16(%rsi),%rbx
|
||||
movq 24(%rsi),%rbp
|
||||
sbbq 8(%rcx),%rdx
|
||||
leaq -1(%r9),%r15
|
||||
jmp .Lsub4x
|
||||
.align 16
|
||||
.Lsub4x:
|
||||
movq %rax,0(%rdi,%r14,8)
|
||||
movq %rdx,8(%rdi,%r14,8)
|
||||
sbbq 16(%rcx,%r14,8),%rbx
|
||||
movq 32(%rsi,%r14,8),%rax
|
||||
movq 40(%rsi,%r14,8),%rdx
|
||||
sbbq 24(%rcx,%r14,8),%rbp
|
||||
movq %rbx,16(%rdi,%r14,8)
|
||||
movq %rbp,24(%rdi,%r14,8)
|
||||
sbbq 32(%rcx,%r14,8),%rax
|
||||
movq 48(%rsi,%r14,8),%rbx
|
||||
movq 56(%rsi,%r14,8),%rbp
|
||||
sbbq 40(%rcx,%r14,8),%rdx
|
||||
leaq 4(%r14),%r14
|
||||
decq %r15
|
||||
jnz .Lsub4x
|
||||
|
||||
movq %rax,0(%rdi,%r14,8)
|
||||
movq 32(%rsi,%r14,8),%rax
|
||||
sbbq 16(%rcx,%r14,8),%rbx
|
||||
movq %rdx,8(%rdi,%r14,8)
|
||||
sbbq 24(%rcx,%r14,8),%rbp
|
||||
movq %rbx,16(%rdi,%r14,8)
|
||||
|
||||
sbbq $0,%rax
|
||||
movq %rbp,24(%rdi,%r14,8)
|
||||
xorq %r14,%r14
|
||||
andq %rax,%rsi
|
||||
notq %rax
|
||||
movq %rdi,%rcx
|
||||
andq %rax,%rcx
|
||||
leaq -1(%r9),%r15
|
||||
orq %rcx,%rsi
|
||||
|
||||
movdqu (%rsi),%xmm1
|
||||
movdqa %xmm0,(%rsp)
|
||||
movdqu %xmm1,(%rdi)
|
||||
jmp .Lcopy4x
|
||||
.align 16
|
||||
.Lcopy4x:
|
||||
movdqu 16(%rsi,%r14,1),%xmm2
|
||||
movdqu 32(%rsi,%r14,1),%xmm1
|
||||
movdqa %xmm0,16(%rsp,%r14,1)
|
||||
movdqu %xmm2,16(%rdi,%r14,1)
|
||||
movdqa %xmm0,32(%rsp,%r14,1)
|
||||
movdqu %xmm1,32(%rdi,%r14,1)
|
||||
leaq 32(%r14),%r14
|
||||
decq %r15
|
||||
jnz .Lcopy4x
|
||||
|
||||
shlq $2,%r9
|
||||
movdqu 16(%rsi,%r14,1),%xmm2
|
||||
movdqa %xmm0,16(%rsp,%r14,1)
|
||||
movdqu %xmm2,16(%rdi,%r14,1)
|
||||
movq 8(%rsp,%r9,8),%rsi
|
||||
movq $1,%rax
|
||||
movq (%rsi),%r15
|
||||
movq 8(%rsi),%r14
|
||||
movq 16(%rsi),%r13
|
||||
movq 24(%rsi),%r12
|
||||
movq 32(%rsi),%rbp
|
||||
movq 40(%rsi),%rbx
|
||||
leaq 48(%rsi),%rsp
|
||||
.Lmul4x_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
.size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5
|
||||
.globl bn_scatter5
|
||||
.type bn_scatter5,@function
|
||||
.align 16
|
||||
bn_scatter5:
|
||||
cmpq $0,%rsi
|
||||
jz .Lscatter_epilogue
|
||||
leaq (%rdx,%rcx,8),%rdx
|
||||
.Lscatter:
|
||||
movq (%rdi),%rax
|
||||
leaq 8(%rdi),%rdi
|
||||
movq %rax,(%rdx)
|
||||
leaq 256(%rdx),%rdx
|
||||
subq $1,%rsi
|
||||
jnz .Lscatter
|
||||
.Lscatter_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
.size bn_scatter5,.-bn_scatter5
|
||||
|
||||
.globl bn_gather5
|
||||
.type bn_gather5,@function
|
||||
.align 16
|
||||
bn_gather5:
|
||||
movq %rcx,%r11
|
||||
shrq $3,%rcx
|
||||
andq $7,%r11
|
||||
notq %rcx
|
||||
leaq .Lmagic_masks(%rip),%rax
|
||||
andq $3,%rcx
|
||||
leaq 96(%rdx,%r11,8),%rdx
|
||||
movq 0(%rax,%rcx,8),%xmm4
|
||||
movq 8(%rax,%rcx,8),%xmm5
|
||||
movq 16(%rax,%rcx,8),%xmm6
|
||||
movq 24(%rax,%rcx,8),%xmm7
|
||||
jmp .Lgather
|
||||
.align 16
|
||||
.Lgather:
|
||||
movq -96(%rdx),%xmm0
|
||||
movq -32(%rdx),%xmm1
|
||||
pand %xmm4,%xmm0
|
||||
movq 32(%rdx),%xmm2
|
||||
pand %xmm5,%xmm1
|
||||
movq 96(%rdx),%xmm3
|
||||
pand %xmm6,%xmm2
|
||||
por %xmm1,%xmm0
|
||||
pand %xmm7,%xmm3
|
||||
por %xmm2,%xmm0
|
||||
leaq 256(%rdx),%rdx
|
||||
por %xmm3,%xmm0
|
||||
|
||||
movq %xmm0,(%rdi)
|
||||
leaq 8(%rdi),%rdi
|
||||
subq $1,%rsi
|
||||
jnz .Lgather
|
||||
.byte 0xf3,0xc3
|
||||
.LSEH_end_bn_gather5:
|
||||
.size bn_gather5,.-bn_gather5
|
||||
.align 64
|
||||
.Lmagic_masks:
|
||||
.long 0,0, 0,0, 0,0, -1,-1
|
||||
.long 0,0, 0,0, 0,0, 0,0
|
||||
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
295
deps/openssl/asm/x64-macosx-gas/bn/x86_64-gf2m.s
vendored
Normal file
295
deps/openssl/asm/x64-macosx-gas/bn/x86_64-gf2m.s
vendored
Normal file
@ -0,0 +1,295 @@
|
||||
.text
|
||||
|
||||
|
||||
|
||||
.p2align 4
|
||||
_mul_1x1:
|
||||
subq $128+8,%rsp
|
||||
movq $-1,%r9
|
||||
leaq (%rax,%rax,1),%rsi
|
||||
shrq $3,%r9
|
||||
leaq (,%rax,4),%rdi
|
||||
andq %rax,%r9
|
||||
leaq (,%rax,8),%r12
|
||||
sarq $63,%rax
|
||||
leaq (%r9,%r9,1),%r10
|
||||
sarq $63,%rsi
|
||||
leaq (,%r9,4),%r11
|
||||
andq %rbp,%rax
|
||||
sarq $63,%rdi
|
||||
movq %rax,%rdx
|
||||
shlq $63,%rax
|
||||
andq %rbp,%rsi
|
||||
shrq $1,%rdx
|
||||
movq %rsi,%rcx
|
||||
shlq $62,%rsi
|
||||
andq %rbp,%rdi
|
||||
shrq $2,%rcx
|
||||
xorq %rsi,%rax
|
||||
movq %rdi,%rbx
|
||||
shlq $61,%rdi
|
||||
xorq %rcx,%rdx
|
||||
shrq $3,%rbx
|
||||
xorq %rdi,%rax
|
||||
xorq %rbx,%rdx
|
||||
|
||||
movq %r9,%r13
|
||||
movq $0,0(%rsp)
|
||||
xorq %r10,%r13
|
||||
movq %r9,8(%rsp)
|
||||
movq %r11,%r14
|
||||
movq %r10,16(%rsp)
|
||||
xorq %r12,%r14
|
||||
movq %r13,24(%rsp)
|
||||
|
||||
xorq %r11,%r9
|
||||
movq %r11,32(%rsp)
|
||||
xorq %r11,%r10
|
||||
movq %r9,40(%rsp)
|
||||
xorq %r11,%r13
|
||||
movq %r10,48(%rsp)
|
||||
xorq %r14,%r9
|
||||
movq %r13,56(%rsp)
|
||||
xorq %r14,%r10
|
||||
|
||||
movq %r12,64(%rsp)
|
||||
xorq %r14,%r13
|
||||
movq %r9,72(%rsp)
|
||||
xorq %r11,%r9
|
||||
movq %r10,80(%rsp)
|
||||
xorq %r11,%r10
|
||||
movq %r13,88(%rsp)
|
||||
|
||||
xorq %r11,%r13
|
||||
movq %r14,96(%rsp)
|
||||
movq %r8,%rsi
|
||||
movq %r9,104(%rsp)
|
||||
andq %rbp,%rsi
|
||||
movq %r10,112(%rsp)
|
||||
shrq $4,%rbp
|
||||
movq %r13,120(%rsp)
|
||||
movq %r8,%rdi
|
||||
andq %rbp,%rdi
|
||||
shrq $4,%rbp
|
||||
|
||||
movq (%rsp,%rsi,8),%xmm0
|
||||
movq %r8,%rsi
|
||||
andq %rbp,%rsi
|
||||
shrq $4,%rbp
|
||||
movq (%rsp,%rdi,8),%rcx
|
||||
movq %r8,%rdi
|
||||
movq %rcx,%rbx
|
||||
shlq $4,%rcx
|
||||
andq %rbp,%rdi
|
||||
movq (%rsp,%rsi,8),%xmm1
|
||||
shrq $60,%rbx
|
||||
xorq %rcx,%rax
|
||||
pslldq $1,%xmm1
|
||||
movq %r8,%rsi
|
||||
shrq $4,%rbp
|
||||
xorq %rbx,%rdx
|
||||
andq %rbp,%rsi
|
||||
shrq $4,%rbp
|
||||
pxor %xmm1,%xmm0
|
||||
movq (%rsp,%rdi,8),%rcx
|
||||
movq %r8,%rdi
|
||||
movq %rcx,%rbx
|
||||
shlq $12,%rcx
|
||||
andq %rbp,%rdi
|
||||
movq (%rsp,%rsi,8),%xmm1
|
||||
shrq $52,%rbx
|
||||
xorq %rcx,%rax
|
||||
pslldq $2,%xmm1
|
||||
movq %r8,%rsi
|
||||
shrq $4,%rbp
|
||||
xorq %rbx,%rdx
|
||||
andq %rbp,%rsi
|
||||
shrq $4,%rbp
|
||||
pxor %xmm1,%xmm0
|
||||
movq (%rsp,%rdi,8),%rcx
|
||||
movq %r8,%rdi
|
||||
movq %rcx,%rbx
|
||||
shlq $20,%rcx
|
||||
andq %rbp,%rdi
|
||||
movq (%rsp,%rsi,8),%xmm1
|
||||
shrq $44,%rbx
|
||||
xorq %rcx,%rax
|
||||
pslldq $3,%xmm1
|
||||
movq %r8,%rsi
|
||||
shrq $4,%rbp
|
||||
xorq %rbx,%rdx
|
||||
andq %rbp,%rsi
|
||||
shrq $4,%rbp
|
||||
pxor %xmm1,%xmm0
|
||||
movq (%rsp,%rdi,8),%rcx
|
||||
movq %r8,%rdi
|
||||
movq %rcx,%rbx
|
||||
shlq $28,%rcx
|
||||
andq %rbp,%rdi
|
||||
movq (%rsp,%rsi,8),%xmm1
|
||||
shrq $36,%rbx
|
||||
xorq %rcx,%rax
|
||||
pslldq $4,%xmm1
|
||||
movq %r8,%rsi
|
||||
shrq $4,%rbp
|
||||
xorq %rbx,%rdx
|
||||
andq %rbp,%rsi
|
||||
shrq $4,%rbp
|
||||
pxor %xmm1,%xmm0
|
||||
movq (%rsp,%rdi,8),%rcx
|
||||
movq %r8,%rdi
|
||||
movq %rcx,%rbx
|
||||
shlq $36,%rcx
|
||||
andq %rbp,%rdi
|
||||
movq (%rsp,%rsi,8),%xmm1
|
||||
shrq $28,%rbx
|
||||
xorq %rcx,%rax
|
||||
pslldq $5,%xmm1
|
||||
movq %r8,%rsi
|
||||
shrq $4,%rbp
|
||||
xorq %rbx,%rdx
|
||||
andq %rbp,%rsi
|
||||
shrq $4,%rbp
|
||||
pxor %xmm1,%xmm0
|
||||
movq (%rsp,%rdi,8),%rcx
|
||||
movq %r8,%rdi
|
||||
movq %rcx,%rbx
|
||||
shlq $44,%rcx
|
||||
andq %rbp,%rdi
|
||||
movq (%rsp,%rsi,8),%xmm1
|
||||
shrq $20,%rbx
|
||||
xorq %rcx,%rax
|
||||
pslldq $6,%xmm1
|
||||
movq %r8,%rsi
|
||||
shrq $4,%rbp
|
||||
xorq %rbx,%rdx
|
||||
andq %rbp,%rsi
|
||||
shrq $4,%rbp
|
||||
pxor %xmm1,%xmm0
|
||||
movq (%rsp,%rdi,8),%rcx
|
||||
movq %r8,%rdi
|
||||
movq %rcx,%rbx
|
||||
shlq $52,%rcx
|
||||
andq %rbp,%rdi
|
||||
movq (%rsp,%rsi,8),%xmm1
|
||||
shrq $12,%rbx
|
||||
xorq %rcx,%rax
|
||||
pslldq $7,%xmm1
|
||||
movq %r8,%rsi
|
||||
shrq $4,%rbp
|
||||
xorq %rbx,%rdx
|
||||
andq %rbp,%rsi
|
||||
shrq $4,%rbp
|
||||
pxor %xmm1,%xmm0
|
||||
movq (%rsp,%rdi,8),%rcx
|
||||
movq %rcx,%rbx
|
||||
shlq $60,%rcx
|
||||
.byte 102,72,15,126,198
|
||||
shrq $4,%rbx
|
||||
xorq %rcx,%rax
|
||||
psrldq $8,%xmm0
|
||||
xorq %rbx,%rdx
|
||||
.byte 102,72,15,126,199
|
||||
xorq %rsi,%rax
|
||||
xorq %rdi,%rdx
|
||||
|
||||
addq $128+8,%rsp
|
||||
.byte 0xf3,0xc3
|
||||
L$end_mul_1x1:
|
||||
|
||||
|
||||
.globl _bn_GF2m_mul_2x2
|
||||
|
||||
.p2align 4
|
||||
_bn_GF2m_mul_2x2:
|
||||
movq _OPENSSL_ia32cap_P(%rip),%rax
|
||||
btq $33,%rax
|
||||
jnc L$vanilla_mul_2x2
|
||||
|
||||
.byte 102,72,15,110,198
|
||||
.byte 102,72,15,110,201
|
||||
.byte 102,72,15,110,210
|
||||
.byte 102,73,15,110,216
|
||||
movdqa %xmm0,%xmm4
|
||||
movdqa %xmm1,%xmm5
|
||||
.byte 102,15,58,68,193,0
|
||||
pxor %xmm2,%xmm4
|
||||
pxor %xmm3,%xmm5
|
||||
.byte 102,15,58,68,211,0
|
||||
.byte 102,15,58,68,229,0
|
||||
xorps %xmm0,%xmm4
|
||||
xorps %xmm2,%xmm4
|
||||
movdqa %xmm4,%xmm5
|
||||
pslldq $8,%xmm4
|
||||
psrldq $8,%xmm5
|
||||
pxor %xmm4,%xmm2
|
||||
pxor %xmm5,%xmm0
|
||||
movdqu %xmm2,0(%rdi)
|
||||
movdqu %xmm0,16(%rdi)
|
||||
.byte 0xf3,0xc3
|
||||
|
||||
.p2align 4
|
||||
L$vanilla_mul_2x2:
|
||||
leaq -136(%rsp),%rsp
|
||||
movq %r14,80(%rsp)
|
||||
movq %r13,88(%rsp)
|
||||
movq %r12,96(%rsp)
|
||||
movq %rbp,104(%rsp)
|
||||
movq %rbx,112(%rsp)
|
||||
L$body_mul_2x2:
|
||||
movq %rdi,32(%rsp)
|
||||
movq %rsi,40(%rsp)
|
||||
movq %rdx,48(%rsp)
|
||||
movq %rcx,56(%rsp)
|
||||
movq %r8,64(%rsp)
|
||||
|
||||
movq $15,%r8
|
||||
movq %rsi,%rax
|
||||
movq %rcx,%rbp
|
||||
call _mul_1x1
|
||||
|
||||
movq %rax,16(%rsp)
|
||||
movq %rdx,24(%rsp)
|
||||
|
||||
movq 48(%rsp),%rax
|
||||
movq 64(%rsp),%rbp
|
||||
call _mul_1x1
|
||||
|
||||
movq %rax,0(%rsp)
|
||||
movq %rdx,8(%rsp)
|
||||
|
||||
movq 40(%rsp),%rax
|
||||
movq 56(%rsp),%rbp
|
||||
xorq 48(%rsp),%rax
|
||||
xorq 64(%rsp),%rbp
|
||||
call _mul_1x1
|
||||
|
||||
movq 0(%rsp),%rbx
|
||||
movq 8(%rsp),%rcx
|
||||
movq 16(%rsp),%rdi
|
||||
movq 24(%rsp),%rsi
|
||||
movq 32(%rsp),%rbp
|
||||
|
||||
xorq %rdx,%rax
|
||||
xorq %rcx,%rdx
|
||||
xorq %rbx,%rax
|
||||
movq %rbx,0(%rbp)
|
||||
xorq %rdi,%rdx
|
||||
movq %rsi,24(%rbp)
|
||||
xorq %rsi,%rax
|
||||
xorq %rsi,%rdx
|
||||
xorq %rdx,%rax
|
||||
movq %rdx,16(%rbp)
|
||||
movq %rax,8(%rbp)
|
||||
|
||||
movq 80(%rsp),%r14
|
||||
movq 88(%rsp),%r13
|
||||
movq 96(%rsp),%r12
|
||||
movq 104(%rsp),%rbp
|
||||
movq 112(%rsp),%rbx
|
||||
leaq 136(%rsp),%rsp
|
||||
.byte 0xf3,0xc3
|
||||
L$end_mul_2x2:
|
||||
|
||||
.byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.p2align 4
|
785
deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont5.s
vendored
Normal file
785
deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont5.s
vendored
Normal file
@ -0,0 +1,785 @@
|
||||
.text
|
||||
|
||||
|
||||
.globl _bn_mul_mont_gather5
|
||||
|
||||
.p2align 6
|
||||
_bn_mul_mont_gather5:
|
||||
testl $3,%r9d
|
||||
jnz L$mul_enter
|
||||
cmpl $8,%r9d
|
||||
jb L$mul_enter
|
||||
jmp L$mul4x_enter
|
||||
|
||||
.p2align 4
|
||||
L$mul_enter:
|
||||
movl %r9d,%r9d
|
||||
movl 8(%rsp),%r10d
|
||||
pushq %rbx
|
||||
pushq %rbp
|
||||
pushq %r12
|
||||
pushq %r13
|
||||
pushq %r14
|
||||
pushq %r15
|
||||
movq %rsp,%rax
|
||||
leaq 2(%r9),%r11
|
||||
negq %r11
|
||||
leaq (%rsp,%r11,8),%rsp
|
||||
andq $-1024,%rsp
|
||||
|
||||
movq %rax,8(%rsp,%r9,8)
|
||||
L$mul_body:
|
||||
movq %rdx,%r12
|
||||
movq %r10,%r11
|
||||
shrq $3,%r10
|
||||
andq $7,%r11
|
||||
notq %r10
|
||||
leaq L$magic_masks(%rip),%rax
|
||||
andq $3,%r10
|
||||
leaq 96(%r12,%r11,8),%r12
|
||||
movq 0(%rax,%r10,8),%xmm4
|
||||
movq 8(%rax,%r10,8),%xmm5
|
||||
movq 16(%rax,%r10,8),%xmm6
|
||||
movq 24(%rax,%r10,8),%xmm7
|
||||
|
||||
movq -96(%r12),%xmm0
|
||||
movq -32(%r12),%xmm1
|
||||
pand %xmm4,%xmm0
|
||||
movq 32(%r12),%xmm2
|
||||
pand %xmm5,%xmm1
|
||||
movq 96(%r12),%xmm3
|
||||
pand %xmm6,%xmm2
|
||||
por %xmm1,%xmm0
|
||||
pand %xmm7,%xmm3
|
||||
por %xmm2,%xmm0
|
||||
leaq 256(%r12),%r12
|
||||
por %xmm3,%xmm0
|
||||
|
||||
.byte 102,72,15,126,195
|
||||
|
||||
movq (%r8),%r8
|
||||
movq (%rsi),%rax
|
||||
|
||||
xorq %r14,%r14
|
||||
xorq %r15,%r15
|
||||
|
||||
movq -96(%r12),%xmm0
|
||||
movq -32(%r12),%xmm1
|
||||
pand %xmm4,%xmm0
|
||||
movq 32(%r12),%xmm2
|
||||
pand %xmm5,%xmm1
|
||||
|
||||
movq %r8,%rbp
|
||||
mulq %rbx
|
||||
movq %rax,%r10
|
||||
movq (%rcx),%rax
|
||||
|
||||
movq 96(%r12),%xmm3
|
||||
pand %xmm6,%xmm2
|
||||
por %xmm1,%xmm0
|
||||
pand %xmm7,%xmm3
|
||||
|
||||
imulq %r10,%rbp
|
||||
movq %rdx,%r11
|
||||
|
||||
por %xmm2,%xmm0
|
||||
leaq 256(%r12),%r12
|
||||
por %xmm3,%xmm0
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%r10
|
||||
movq 8(%rsi),%rax
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%r13
|
||||
|
||||
leaq 1(%r15),%r15
|
||||
jmp L$1st_enter
|
||||
|
||||
.p2align 4
|
||||
L$1st:
|
||||
addq %rax,%r13
|
||||
movq (%rsi,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r11,%r13
|
||||
movq %r10,%r11
|
||||
adcq $0,%rdx
|
||||
movq %r13,-16(%rsp,%r15,8)
|
||||
movq %rdx,%r13
|
||||
|
||||
L$1st_enter:
|
||||
mulq %rbx
|
||||
addq %rax,%r11
|
||||
movq (%rcx,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
leaq 1(%r15),%r15
|
||||
movq %rdx,%r10
|
||||
|
||||
mulq %rbp
|
||||
cmpq %r9,%r15
|
||||
jne L$1st
|
||||
|
||||
.byte 102,72,15,126,195
|
||||
|
||||
addq %rax,%r13
|
||||
movq (%rsi),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r11,%r13
|
||||
adcq $0,%rdx
|
||||
movq %r13,-16(%rsp,%r15,8)
|
||||
movq %rdx,%r13
|
||||
movq %r10,%r11
|
||||
|
||||
xorq %rdx,%rdx
|
||||
addq %r11,%r13
|
||||
adcq $0,%rdx
|
||||
movq %r13,-8(%rsp,%r9,8)
|
||||
movq %rdx,(%rsp,%r9,8)
|
||||
|
||||
leaq 1(%r14),%r14
|
||||
jmp L$outer
|
||||
.p2align 4
|
||||
L$outer:
|
||||
xorq %r15,%r15
|
||||
movq %r8,%rbp
|
||||
movq (%rsp),%r10
|
||||
|
||||
movq -96(%r12),%xmm0
|
||||
movq -32(%r12),%xmm1
|
||||
pand %xmm4,%xmm0
|
||||
movq 32(%r12),%xmm2
|
||||
pand %xmm5,%xmm1
|
||||
|
||||
mulq %rbx
|
||||
addq %rax,%r10
|
||||
movq (%rcx),%rax
|
||||
adcq $0,%rdx
|
||||
|
||||
movq 96(%r12),%xmm3
|
||||
pand %xmm6,%xmm2
|
||||
por %xmm1,%xmm0
|
||||
pand %xmm7,%xmm3
|
||||
|
||||
imulq %r10,%rbp
|
||||
movq %rdx,%r11
|
||||
|
||||
por %xmm2,%xmm0
|
||||
leaq 256(%r12),%r12
|
||||
por %xmm3,%xmm0
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%r10
|
||||
movq 8(%rsi),%rax
|
||||
adcq $0,%rdx
|
||||
movq 8(%rsp),%r10
|
||||
movq %rdx,%r13
|
||||
|
||||
leaq 1(%r15),%r15
|
||||
jmp L$inner_enter
|
||||
|
||||
.p2align 4
|
||||
L$inner:
|
||||
addq %rax,%r13
|
||||
movq (%rsi,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r10,%r13
|
||||
movq (%rsp,%r15,8),%r10
|
||||
adcq $0,%rdx
|
||||
movq %r13,-16(%rsp,%r15,8)
|
||||
movq %rdx,%r13
|
||||
|
||||
L$inner_enter:
|
||||
mulq %rbx
|
||||
addq %rax,%r11
|
||||
movq (%rcx,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r11,%r10
|
||||
movq %rdx,%r11
|
||||
adcq $0,%r11
|
||||
leaq 1(%r15),%r15
|
||||
|
||||
mulq %rbp
|
||||
cmpq %r9,%r15
|
||||
jne L$inner
|
||||
|
||||
.byte 102,72,15,126,195
|
||||
|
||||
addq %rax,%r13
|
||||
movq (%rsi),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r10,%r13
|
||||
movq (%rsp,%r15,8),%r10
|
||||
adcq $0,%rdx
|
||||
movq %r13,-16(%rsp,%r15,8)
|
||||
movq %rdx,%r13
|
||||
|
||||
xorq %rdx,%rdx
|
||||
addq %r11,%r13
|
||||
adcq $0,%rdx
|
||||
addq %r10,%r13
|
||||
adcq $0,%rdx
|
||||
movq %r13,-8(%rsp,%r9,8)
|
||||
movq %rdx,(%rsp,%r9,8)
|
||||
|
||||
leaq 1(%r14),%r14
|
||||
cmpq %r9,%r14
|
||||
jl L$outer
|
||||
|
||||
xorq %r14,%r14
|
||||
movq (%rsp),%rax
|
||||
leaq (%rsp),%rsi
|
||||
movq %r9,%r15
|
||||
jmp L$sub
|
||||
.p2align 4
|
||||
L$sub: sbbq (%rcx,%r14,8),%rax
|
||||
movq %rax,(%rdi,%r14,8)
|
||||
movq 8(%rsi,%r14,8),%rax
|
||||
leaq 1(%r14),%r14
|
||||
decq %r15
|
||||
jnz L$sub
|
||||
|
||||
sbbq $0,%rax
|
||||
xorq %r14,%r14
|
||||
andq %rax,%rsi
|
||||
notq %rax
|
||||
movq %rdi,%rcx
|
||||
andq %rax,%rcx
|
||||
movq %r9,%r15
|
||||
orq %rcx,%rsi
|
||||
.p2align 4
|
||||
L$copy:
|
||||
movq (%rsi,%r14,8),%rax
|
||||
movq %r14,(%rsp,%r14,8)
|
||||
movq %rax,(%rdi,%r14,8)
|
||||
leaq 1(%r14),%r14
|
||||
subq $1,%r15
|
||||
jnz L$copy
|
||||
|
||||
movq 8(%rsp,%r9,8),%rsi
|
||||
movq $1,%rax
|
||||
movq (%rsi),%r15
|
||||
movq 8(%rsi),%r14
|
||||
movq 16(%rsi),%r13
|
||||
movq 24(%rsi),%r12
|
||||
movq 32(%rsi),%rbp
|
||||
movq 40(%rsi),%rbx
|
||||
leaq 48(%rsi),%rsp
|
||||
L$mul_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
|
||||
|
||||
.p2align 4
|
||||
bn_mul4x_mont_gather5:
|
||||
L$mul4x_enter:
|
||||
movl %r9d,%r9d
|
||||
movl 8(%rsp),%r10d
|
||||
pushq %rbx
|
||||
pushq %rbp
|
||||
pushq %r12
|
||||
pushq %r13
|
||||
pushq %r14
|
||||
pushq %r15
|
||||
movq %rsp,%rax
|
||||
leaq 4(%r9),%r11
|
||||
negq %r11
|
||||
leaq (%rsp,%r11,8),%rsp
|
||||
andq $-1024,%rsp
|
||||
|
||||
movq %rax,8(%rsp,%r9,8)
|
||||
L$mul4x_body:
|
||||
movq %rdi,16(%rsp,%r9,8)
|
||||
movq %rdx,%r12
|
||||
movq %r10,%r11
|
||||
shrq $3,%r10
|
||||
andq $7,%r11
|
||||
notq %r10
|
||||
leaq L$magic_masks(%rip),%rax
|
||||
andq $3,%r10
|
||||
leaq 96(%r12,%r11,8),%r12
|
||||
movq 0(%rax,%r10,8),%xmm4
|
||||
movq 8(%rax,%r10,8),%xmm5
|
||||
movq 16(%rax,%r10,8),%xmm6
|
||||
movq 24(%rax,%r10,8),%xmm7
|
||||
|
||||
movq -96(%r12),%xmm0
|
||||
movq -32(%r12),%xmm1
|
||||
pand %xmm4,%xmm0
|
||||
movq 32(%r12),%xmm2
|
||||
pand %xmm5,%xmm1
|
||||
movq 96(%r12),%xmm3
|
||||
pand %xmm6,%xmm2
|
||||
por %xmm1,%xmm0
|
||||
pand %xmm7,%xmm3
|
||||
por %xmm2,%xmm0
|
||||
leaq 256(%r12),%r12
|
||||
por %xmm3,%xmm0
|
||||
|
||||
.byte 102,72,15,126,195
|
||||
movq (%r8),%r8
|
||||
movq (%rsi),%rax
|
||||
|
||||
xorq %r14,%r14
|
||||
xorq %r15,%r15
|
||||
|
||||
movq -96(%r12),%xmm0
|
||||
movq -32(%r12),%xmm1
|
||||
pand %xmm4,%xmm0
|
||||
movq 32(%r12),%xmm2
|
||||
pand %xmm5,%xmm1
|
||||
|
||||
movq %r8,%rbp
|
||||
mulq %rbx
|
||||
movq %rax,%r10
|
||||
movq (%rcx),%rax
|
||||
|
||||
movq 96(%r12),%xmm3
|
||||
pand %xmm6,%xmm2
|
||||
por %xmm1,%xmm0
|
||||
pand %xmm7,%xmm3
|
||||
|
||||
imulq %r10,%rbp
|
||||
movq %rdx,%r11
|
||||
|
||||
por %xmm2,%xmm0
|
||||
leaq 256(%r12),%r12
|
||||
por %xmm3,%xmm0
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%r10
|
||||
movq 8(%rsi),%rax
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%rdi
|
||||
|
||||
mulq %rbx
|
||||
addq %rax,%r11
|
||||
movq 8(%rcx),%rax
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%r10
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%rdi
|
||||
movq 16(%rsi),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r11,%rdi
|
||||
leaq 4(%r15),%r15
|
||||
adcq $0,%rdx
|
||||
movq %rdi,(%rsp)
|
||||
movq %rdx,%r13
|
||||
jmp L$1st4x
|
||||
.p2align 4
|
||||
L$1st4x:
|
||||
mulq %rbx
|
||||
addq %rax,%r10
|
||||
movq -16(%rcx,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%r11
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%r13
|
||||
movq -8(%rsi,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r10,%r13
|
||||
adcq $0,%rdx
|
||||
movq %r13,-24(%rsp,%r15,8)
|
||||
movq %rdx,%rdi
|
||||
|
||||
mulq %rbx
|
||||
addq %rax,%r11
|
||||
movq -8(%rcx,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%r10
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%rdi
|
||||
movq (%rsi,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r11,%rdi
|
||||
adcq $0,%rdx
|
||||
movq %rdi,-16(%rsp,%r15,8)
|
||||
movq %rdx,%r13
|
||||
|
||||
mulq %rbx
|
||||
addq %rax,%r10
|
||||
movq (%rcx,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%r11
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%r13
|
||||
movq 8(%rsi,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r10,%r13
|
||||
adcq $0,%rdx
|
||||
movq %r13,-8(%rsp,%r15,8)
|
||||
movq %rdx,%rdi
|
||||
|
||||
mulq %rbx
|
||||
addq %rax,%r11
|
||||
movq 8(%rcx,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
leaq 4(%r15),%r15
|
||||
movq %rdx,%r10
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%rdi
|
||||
movq -16(%rsi,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r11,%rdi
|
||||
adcq $0,%rdx
|
||||
movq %rdi,-32(%rsp,%r15,8)
|
||||
movq %rdx,%r13
|
||||
cmpq %r9,%r15
|
||||
jl L$1st4x
|
||||
|
||||
mulq %rbx
|
||||
addq %rax,%r10
|
||||
movq -16(%rcx,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%r11
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%r13
|
||||
movq -8(%rsi,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r10,%r13
|
||||
adcq $0,%rdx
|
||||
movq %r13,-24(%rsp,%r15,8)
|
||||
movq %rdx,%rdi
|
||||
|
||||
mulq %rbx
|
||||
addq %rax,%r11
|
||||
movq -8(%rcx,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%r10
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%rdi
|
||||
movq (%rsi),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r11,%rdi
|
||||
adcq $0,%rdx
|
||||
movq %rdi,-16(%rsp,%r15,8)
|
||||
movq %rdx,%r13
|
||||
|
||||
.byte 102,72,15,126,195
|
||||
|
||||
xorq %rdi,%rdi
|
||||
addq %r10,%r13
|
||||
adcq $0,%rdi
|
||||
movq %r13,-8(%rsp,%r15,8)
|
||||
movq %rdi,(%rsp,%r15,8)
|
||||
|
||||
leaq 1(%r14),%r14
|
||||
.p2align 2
|
||||
L$outer4x:
|
||||
xorq %r15,%r15
|
||||
movq -96(%r12),%xmm0
|
||||
movq -32(%r12),%xmm1
|
||||
pand %xmm4,%xmm0
|
||||
movq 32(%r12),%xmm2
|
||||
pand %xmm5,%xmm1
|
||||
|
||||
movq (%rsp),%r10
|
||||
movq %r8,%rbp
|
||||
mulq %rbx
|
||||
addq %rax,%r10
|
||||
movq (%rcx),%rax
|
||||
adcq $0,%rdx
|
||||
|
||||
movq 96(%r12),%xmm3
|
||||
pand %xmm6,%xmm2
|
||||
por %xmm1,%xmm0
|
||||
pand %xmm7,%xmm3
|
||||
|
||||
imulq %r10,%rbp
|
||||
movq %rdx,%r11
|
||||
|
||||
por %xmm2,%xmm0
|
||||
leaq 256(%r12),%r12
|
||||
por %xmm3,%xmm0
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%r10
|
||||
movq 8(%rsi),%rax
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%rdi
|
||||
|
||||
mulq %rbx
|
||||
addq %rax,%r11
|
||||
movq 8(%rcx),%rax
|
||||
adcq $0,%rdx
|
||||
addq 8(%rsp),%r11
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%r10
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%rdi
|
||||
movq 16(%rsi),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r11,%rdi
|
||||
leaq 4(%r15),%r15
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%r13
|
||||
jmp L$inner4x
|
||||
.p2align 4
|
||||
L$inner4x:
|
||||
mulq %rbx
|
||||
addq %rax,%r10
|
||||
movq -16(%rcx,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq -16(%rsp,%r15,8),%r10
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%r11
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%r13
|
||||
movq -8(%rsi,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r10,%r13
|
||||
adcq $0,%rdx
|
||||
movq %rdi,-32(%rsp,%r15,8)
|
||||
movq %rdx,%rdi
|
||||
|
||||
mulq %rbx
|
||||
addq %rax,%r11
|
||||
movq -8(%rcx,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq -8(%rsp,%r15,8),%r11
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%r10
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%rdi
|
||||
movq (%rsi,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r11,%rdi
|
||||
adcq $0,%rdx
|
||||
movq %r13,-24(%rsp,%r15,8)
|
||||
movq %rdx,%r13
|
||||
|
||||
mulq %rbx
|
||||
addq %rax,%r10
|
||||
movq (%rcx,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq (%rsp,%r15,8),%r10
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%r11
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%r13
|
||||
movq 8(%rsi,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r10,%r13
|
||||
adcq $0,%rdx
|
||||
movq %rdi,-16(%rsp,%r15,8)
|
||||
movq %rdx,%rdi
|
||||
|
||||
mulq %rbx
|
||||
addq %rax,%r11
|
||||
movq 8(%rcx,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq 8(%rsp,%r15,8),%r11
|
||||
adcq $0,%rdx
|
||||
leaq 4(%r15),%r15
|
||||
movq %rdx,%r10
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%rdi
|
||||
movq -16(%rsi,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r11,%rdi
|
||||
adcq $0,%rdx
|
||||
movq %r13,-40(%rsp,%r15,8)
|
||||
movq %rdx,%r13
|
||||
cmpq %r9,%r15
|
||||
jl L$inner4x
|
||||
|
||||
mulq %rbx
|
||||
addq %rax,%r10
|
||||
movq -16(%rcx,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq -16(%rsp,%r15,8),%r10
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%r11
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%r13
|
||||
movq -8(%rsi,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r10,%r13
|
||||
adcq $0,%rdx
|
||||
movq %rdi,-32(%rsp,%r15,8)
|
||||
movq %rdx,%rdi
|
||||
|
||||
mulq %rbx
|
||||
addq %rax,%r11
|
||||
movq -8(%rcx,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq -8(%rsp,%r15,8),%r11
|
||||
adcq $0,%rdx
|
||||
leaq 1(%r14),%r14
|
||||
movq %rdx,%r10
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%rdi
|
||||
movq (%rsi),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r11,%rdi
|
||||
adcq $0,%rdx
|
||||
movq %r13,-24(%rsp,%r15,8)
|
||||
movq %rdx,%r13
|
||||
|
||||
.byte 102,72,15,126,195
|
||||
movq %rdi,-16(%rsp,%r15,8)
|
||||
|
||||
xorq %rdi,%rdi
|
||||
addq %r10,%r13
|
||||
adcq $0,%rdi
|
||||
addq (%rsp,%r9,8),%r13
|
||||
adcq $0,%rdi
|
||||
movq %r13,-8(%rsp,%r15,8)
|
||||
movq %rdi,(%rsp,%r15,8)
|
||||
|
||||
cmpq %r9,%r14
|
||||
jl L$outer4x
|
||||
movq 16(%rsp,%r9,8),%rdi
|
||||
movq 0(%rsp),%rax
|
||||
pxor %xmm0,%xmm0
|
||||
movq 8(%rsp),%rdx
|
||||
shrq $2,%r9
|
||||
leaq (%rsp),%rsi
|
||||
xorq %r14,%r14
|
||||
|
||||
subq 0(%rcx),%rax
|
||||
movq 16(%rsi),%rbx
|
||||
movq 24(%rsi),%rbp
|
||||
sbbq 8(%rcx),%rdx
|
||||
leaq -1(%r9),%r15
|
||||
jmp L$sub4x
|
||||
.p2align 4
|
||||
L$sub4x:
|
||||
movq %rax,0(%rdi,%r14,8)
|
||||
movq %rdx,8(%rdi,%r14,8)
|
||||
sbbq 16(%rcx,%r14,8),%rbx
|
||||
movq 32(%rsi,%r14,8),%rax
|
||||
movq 40(%rsi,%r14,8),%rdx
|
||||
sbbq 24(%rcx,%r14,8),%rbp
|
||||
movq %rbx,16(%rdi,%r14,8)
|
||||
movq %rbp,24(%rdi,%r14,8)
|
||||
sbbq 32(%rcx,%r14,8),%rax
|
||||
movq 48(%rsi,%r14,8),%rbx
|
||||
movq 56(%rsi,%r14,8),%rbp
|
||||
sbbq 40(%rcx,%r14,8),%rdx
|
||||
leaq 4(%r14),%r14
|
||||
decq %r15
|
||||
jnz L$sub4x
|
||||
|
||||
movq %rax,0(%rdi,%r14,8)
|
||||
movq 32(%rsi,%r14,8),%rax
|
||||
sbbq 16(%rcx,%r14,8),%rbx
|
||||
movq %rdx,8(%rdi,%r14,8)
|
||||
sbbq 24(%rcx,%r14,8),%rbp
|
||||
movq %rbx,16(%rdi,%r14,8)
|
||||
|
||||
sbbq $0,%rax
|
||||
movq %rbp,24(%rdi,%r14,8)
|
||||
xorq %r14,%r14
|
||||
andq %rax,%rsi
|
||||
notq %rax
|
||||
movq %rdi,%rcx
|
||||
andq %rax,%rcx
|
||||
leaq -1(%r9),%r15
|
||||
orq %rcx,%rsi
|
||||
|
||||
movdqu (%rsi),%xmm1
|
||||
movdqa %xmm0,(%rsp)
|
||||
movdqu %xmm1,(%rdi)
|
||||
jmp L$copy4x
|
||||
.p2align 4
|
||||
L$copy4x:
|
||||
movdqu 16(%rsi,%r14,1),%xmm2
|
||||
movdqu 32(%rsi,%r14,1),%xmm1
|
||||
movdqa %xmm0,16(%rsp,%r14,1)
|
||||
movdqu %xmm2,16(%rdi,%r14,1)
|
||||
movdqa %xmm0,32(%rsp,%r14,1)
|
||||
movdqu %xmm1,32(%rdi,%r14,1)
|
||||
leaq 32(%r14),%r14
|
||||
decq %r15
|
||||
jnz L$copy4x
|
||||
|
||||
shlq $2,%r9
|
||||
movdqu 16(%rsi,%r14,1),%xmm2
|
||||
movdqa %xmm0,16(%rsp,%r14,1)
|
||||
movdqu %xmm2,16(%rdi,%r14,1)
|
||||
movq 8(%rsp,%r9,8),%rsi
|
||||
movq $1,%rax
|
||||
movq (%rsi),%r15
|
||||
movq 8(%rsi),%r14
|
||||
movq 16(%rsi),%r13
|
||||
movq 24(%rsi),%r12
|
||||
movq 32(%rsi),%rbp
|
||||
movq 40(%rsi),%rbx
|
||||
leaq 48(%rsi),%rsp
|
||||
L$mul4x_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
|
||||
.globl _bn_scatter5
|
||||
|
||||
.p2align 4
|
||||
_bn_scatter5:
|
||||
cmpq $0,%rsi
|
||||
jz L$scatter_epilogue
|
||||
leaq (%rdx,%rcx,8),%rdx
|
||||
L$scatter:
|
||||
movq (%rdi),%rax
|
||||
leaq 8(%rdi),%rdi
|
||||
movq %rax,(%rdx)
|
||||
leaq 256(%rdx),%rdx
|
||||
subq $1,%rsi
|
||||
jnz L$scatter
|
||||
L$scatter_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
|
||||
|
||||
.globl _bn_gather5
|
||||
|
||||
.p2align 4
|
||||
_bn_gather5:
|
||||
movq %rcx,%r11
|
||||
shrq $3,%rcx
|
||||
andq $7,%r11
|
||||
notq %rcx
|
||||
leaq L$magic_masks(%rip),%rax
|
||||
andq $3,%rcx
|
||||
leaq 96(%rdx,%r11,8),%rdx
|
||||
movq 0(%rax,%rcx,8),%xmm4
|
||||
movq 8(%rax,%rcx,8),%xmm5
|
||||
movq 16(%rax,%rcx,8),%xmm6
|
||||
movq 24(%rax,%rcx,8),%xmm7
|
||||
jmp L$gather
|
||||
.p2align 4
|
||||
L$gather:
|
||||
movq -96(%rdx),%xmm0
|
||||
movq -32(%rdx),%xmm1
|
||||
pand %xmm4,%xmm0
|
||||
movq 32(%rdx),%xmm2
|
||||
pand %xmm5,%xmm1
|
||||
movq 96(%rdx),%xmm3
|
||||
pand %xmm6,%xmm2
|
||||
por %xmm1,%xmm0
|
||||
pand %xmm7,%xmm3
|
||||
por %xmm2,%xmm0
|
||||
leaq 256(%rdx),%rdx
|
||||
por %xmm3,%xmm0
|
||||
|
||||
movq %xmm0,(%rdi)
|
||||
leaq 8(%rdi),%rdi
|
||||
subq $1,%rsi
|
||||
jnz L$gather
|
||||
.byte 0xf3,0xc3
|
||||
L$SEH_end_bn_gather5:
|
||||
|
||||
.p2align 6
|
||||
L$magic_masks:
|
||||
.long 0,0, 0,0, 0,0, -1,-1
|
||||
.long 0,0, 0,0, 0,0, 0,0
|
||||
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
404
deps/openssl/asm/x64-win32-masm/bn/x86_64-gf2m.asm
vendored
Normal file
404
deps/openssl/asm/x64-win32-masm/bn/x86_64-gf2m.asm
vendored
Normal file
@ -0,0 +1,404 @@
|
||||
OPTION DOTNAME
|
||||
.text$ SEGMENT ALIGN(64) 'CODE'
|
||||
|
||||
|
||||
ALIGN 16
|
||||
_mul_1x1 PROC PRIVATE
|
||||
sub rsp,128+8
|
||||
mov r9,-1
|
||||
lea rsi,QWORD PTR[rax*1+rax]
|
||||
shr r9,3
|
||||
lea rdi,QWORD PTR[rax*4]
|
||||
and r9,rax
|
||||
lea r12,QWORD PTR[rax*8]
|
||||
sar rax,63
|
||||
lea r10,QWORD PTR[r9*1+r9]
|
||||
sar rsi,63
|
||||
lea r11,QWORD PTR[r9*4]
|
||||
and rax,rbp
|
||||
sar rdi,63
|
||||
mov rdx,rax
|
||||
shl rax,63
|
||||
and rsi,rbp
|
||||
shr rdx,1
|
||||
mov rcx,rsi
|
||||
shl rsi,62
|
||||
and rdi,rbp
|
||||
shr rcx,2
|
||||
xor rax,rsi
|
||||
mov rbx,rdi
|
||||
shl rdi,61
|
||||
xor rdx,rcx
|
||||
shr rbx,3
|
||||
xor rax,rdi
|
||||
xor rdx,rbx
|
||||
|
||||
mov r13,r9
|
||||
mov QWORD PTR[rsp],0
|
||||
xor r13,r10
|
||||
mov QWORD PTR[8+rsp],r9
|
||||
mov r14,r11
|
||||
mov QWORD PTR[16+rsp],r10
|
||||
xor r14,r12
|
||||
mov QWORD PTR[24+rsp],r13
|
||||
|
||||
xor r9,r11
|
||||
mov QWORD PTR[32+rsp],r11
|
||||
xor r10,r11
|
||||
mov QWORD PTR[40+rsp],r9
|
||||
xor r13,r11
|
||||
mov QWORD PTR[48+rsp],r10
|
||||
xor r9,r14
|
||||
mov QWORD PTR[56+rsp],r13
|
||||
xor r10,r14
|
||||
|
||||
mov QWORD PTR[64+rsp],r12
|
||||
xor r13,r14
|
||||
mov QWORD PTR[72+rsp],r9
|
||||
xor r9,r11
|
||||
mov QWORD PTR[80+rsp],r10
|
||||
xor r10,r11
|
||||
mov QWORD PTR[88+rsp],r13
|
||||
|
||||
xor r13,r11
|
||||
mov QWORD PTR[96+rsp],r14
|
||||
mov rsi,r8
|
||||
mov QWORD PTR[104+rsp],r9
|
||||
and rsi,rbp
|
||||
mov QWORD PTR[112+rsp],r10
|
||||
shr rbp,4
|
||||
mov QWORD PTR[120+rsp],r13
|
||||
mov rdi,r8
|
||||
and rdi,rbp
|
||||
shr rbp,4
|
||||
|
||||
movq xmm0,QWORD PTR[rsi*8+rsp]
|
||||
mov rsi,r8
|
||||
and rsi,rbp
|
||||
shr rbp,4
|
||||
mov rcx,QWORD PTR[rdi*8+rsp]
|
||||
mov rdi,r8
|
||||
mov rbx,rcx
|
||||
shl rcx,4
|
||||
and rdi,rbp
|
||||
movq xmm1,QWORD PTR[rsi*8+rsp]
|
||||
shr rbx,60
|
||||
xor rax,rcx
|
||||
pslldq xmm1,1
|
||||
mov rsi,r8
|
||||
shr rbp,4
|
||||
xor rdx,rbx
|
||||
and rsi,rbp
|
||||
shr rbp,4
|
||||
pxor xmm0,xmm1
|
||||
mov rcx,QWORD PTR[rdi*8+rsp]
|
||||
mov rdi,r8
|
||||
mov rbx,rcx
|
||||
shl rcx,12
|
||||
and rdi,rbp
|
||||
movq xmm1,QWORD PTR[rsi*8+rsp]
|
||||
shr rbx,52
|
||||
xor rax,rcx
|
||||
pslldq xmm1,2
|
||||
mov rsi,r8
|
||||
shr rbp,4
|
||||
xor rdx,rbx
|
||||
and rsi,rbp
|
||||
shr rbp,4
|
||||
pxor xmm0,xmm1
|
||||
mov rcx,QWORD PTR[rdi*8+rsp]
|
||||
mov rdi,r8
|
||||
mov rbx,rcx
|
||||
shl rcx,20
|
||||
and rdi,rbp
|
||||
movq xmm1,QWORD PTR[rsi*8+rsp]
|
||||
shr rbx,44
|
||||
xor rax,rcx
|
||||
pslldq xmm1,3
|
||||
mov rsi,r8
|
||||
shr rbp,4
|
||||
xor rdx,rbx
|
||||
and rsi,rbp
|
||||
shr rbp,4
|
||||
pxor xmm0,xmm1
|
||||
mov rcx,QWORD PTR[rdi*8+rsp]
|
||||
mov rdi,r8
|
||||
mov rbx,rcx
|
||||
shl rcx,28
|
||||
and rdi,rbp
|
||||
movq xmm1,QWORD PTR[rsi*8+rsp]
|
||||
shr rbx,36
|
||||
xor rax,rcx
|
||||
pslldq xmm1,4
|
||||
mov rsi,r8
|
||||
shr rbp,4
|
||||
xor rdx,rbx
|
||||
and rsi,rbp
|
||||
shr rbp,4
|
||||
pxor xmm0,xmm1
|
||||
mov rcx,QWORD PTR[rdi*8+rsp]
|
||||
mov rdi,r8
|
||||
mov rbx,rcx
|
||||
shl rcx,36
|
||||
and rdi,rbp
|
||||
movq xmm1,QWORD PTR[rsi*8+rsp]
|
||||
shr rbx,28
|
||||
xor rax,rcx
|
||||
pslldq xmm1,5
|
||||
mov rsi,r8
|
||||
shr rbp,4
|
||||
xor rdx,rbx
|
||||
and rsi,rbp
|
||||
shr rbp,4
|
||||
pxor xmm0,xmm1
|
||||
mov rcx,QWORD PTR[rdi*8+rsp]
|
||||
mov rdi,r8
|
||||
mov rbx,rcx
|
||||
shl rcx,44
|
||||
and rdi,rbp
|
||||
movq xmm1,QWORD PTR[rsi*8+rsp]
|
||||
shr rbx,20
|
||||
xor rax,rcx
|
||||
pslldq xmm1,6
|
||||
mov rsi,r8
|
||||
shr rbp,4
|
||||
xor rdx,rbx
|
||||
and rsi,rbp
|
||||
shr rbp,4
|
||||
pxor xmm0,xmm1
|
||||
mov rcx,QWORD PTR[rdi*8+rsp]
|
||||
mov rdi,r8
|
||||
mov rbx,rcx
|
||||
shl rcx,52
|
||||
and rdi,rbp
|
||||
movq xmm1,QWORD PTR[rsi*8+rsp]
|
||||
shr rbx,12
|
||||
xor rax,rcx
|
||||
pslldq xmm1,7
|
||||
mov rsi,r8
|
||||
shr rbp,4
|
||||
xor rdx,rbx
|
||||
and rsi,rbp
|
||||
shr rbp,4
|
||||
pxor xmm0,xmm1
|
||||
mov rcx,QWORD PTR[rdi*8+rsp]
|
||||
mov rbx,rcx
|
||||
shl rcx,60
|
||||
DB 102,72,15,126,198
|
||||
shr rbx,4
|
||||
xor rax,rcx
|
||||
psrldq xmm0,8
|
||||
xor rdx,rbx
|
||||
DB 102,72,15,126,199
|
||||
xor rax,rsi
|
||||
xor rdx,rdi
|
||||
|
||||
add rsp,128+8
|
||||
DB 0F3h,0C3h ;repret
|
||||
$L$end_mul_1x1::
|
||||
_mul_1x1 ENDP
|
||||
EXTERN OPENSSL_ia32cap_P:NEAR
|
||||
PUBLIC bn_GF2m_mul_2x2
|
||||
|
||||
ALIGN 16
|
||||
bn_GF2m_mul_2x2 PROC PUBLIC
|
||||
mov rax,QWORD PTR[OPENSSL_ia32cap_P]
|
||||
bt rax,33
|
||||
jnc $L$vanilla_mul_2x2
|
||||
|
||||
DB 102,72,15,110,194
|
||||
DB 102,73,15,110,201
|
||||
DB 102,73,15,110,208
|
||||
movq xmm3,QWORD PTR[40+rsp]
|
||||
movdqa xmm4,xmm0
|
||||
movdqa xmm5,xmm1
|
||||
DB 102,15,58,68,193,0
|
||||
pxor xmm4,xmm2
|
||||
pxor xmm5,xmm3
|
||||
DB 102,15,58,68,211,0
|
||||
DB 102,15,58,68,229,0
|
||||
xorps xmm4,xmm0
|
||||
xorps xmm4,xmm2
|
||||
movdqa xmm5,xmm4
|
||||
pslldq xmm4,8
|
||||
psrldq xmm5,8
|
||||
pxor xmm2,xmm4
|
||||
pxor xmm0,xmm5
|
||||
movdqu XMMWORD PTR[rcx],xmm2
|
||||
movdqu XMMWORD PTR[16+rcx],xmm0
|
||||
DB 0F3h,0C3h ;repret
|
||||
|
||||
ALIGN 16
|
||||
$L$vanilla_mul_2x2::
|
||||
lea rsp,QWORD PTR[((-136))+rsp]
|
||||
mov r10,QWORD PTR[176+rsp]
|
||||
mov QWORD PTR[120+rsp],rdi
|
||||
mov QWORD PTR[128+rsp],rsi
|
||||
mov QWORD PTR[80+rsp],r14
|
||||
mov QWORD PTR[88+rsp],r13
|
||||
mov QWORD PTR[96+rsp],r12
|
||||
mov QWORD PTR[104+rsp],rbp
|
||||
mov QWORD PTR[112+rsp],rbx
|
||||
$L$body_mul_2x2::
|
||||
mov QWORD PTR[32+rsp],rcx
|
||||
mov QWORD PTR[40+rsp],rdx
|
||||
mov QWORD PTR[48+rsp],r8
|
||||
mov QWORD PTR[56+rsp],r9
|
||||
mov QWORD PTR[64+rsp],r10
|
||||
|
||||
mov r8,0fh
|
||||
mov rax,rdx
|
||||
mov rbp,r9
|
||||
call _mul_1x1
|
||||
|
||||
mov QWORD PTR[16+rsp],rax
|
||||
mov QWORD PTR[24+rsp],rdx
|
||||
|
||||
mov rax,QWORD PTR[48+rsp]
|
||||
mov rbp,QWORD PTR[64+rsp]
|
||||
call _mul_1x1
|
||||
|
||||
mov QWORD PTR[rsp],rax
|
||||
mov QWORD PTR[8+rsp],rdx
|
||||
|
||||
mov rax,QWORD PTR[40+rsp]
|
||||
mov rbp,QWORD PTR[56+rsp]
|
||||
xor rax,QWORD PTR[48+rsp]
|
||||
xor rbp,QWORD PTR[64+rsp]
|
||||
call _mul_1x1
|
||||
|
||||
mov rbx,QWORD PTR[rsp]
|
||||
mov rcx,QWORD PTR[8+rsp]
|
||||
mov rdi,QWORD PTR[16+rsp]
|
||||
mov rsi,QWORD PTR[24+rsp]
|
||||
mov rbp,QWORD PTR[32+rsp]
|
||||
|
||||
xor rax,rdx
|
||||
xor rdx,rcx
|
||||
xor rax,rbx
|
||||
mov QWORD PTR[rbp],rbx
|
||||
xor rdx,rdi
|
||||
mov QWORD PTR[24+rbp],rsi
|
||||
xor rax,rsi
|
||||
xor rdx,rsi
|
||||
xor rax,rdx
|
||||
mov QWORD PTR[16+rbp],rdx
|
||||
mov QWORD PTR[8+rbp],rax
|
||||
|
||||
mov r14,QWORD PTR[80+rsp]
|
||||
mov r13,QWORD PTR[88+rsp]
|
||||
mov r12,QWORD PTR[96+rsp]
|
||||
mov rbp,QWORD PTR[104+rsp]
|
||||
mov rbx,QWORD PTR[112+rsp]
|
||||
mov rdi,QWORD PTR[120+rsp]
|
||||
mov rsi,QWORD PTR[128+rsp]
|
||||
lea rsp,QWORD PTR[136+rsp]
|
||||
DB 0F3h,0C3h ;repret
|
||||
$L$end_mul_2x2::
|
||||
bn_GF2m_mul_2x2 ENDP
|
||||
DB 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105
|
||||
DB 99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54
|
||||
DB 52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
|
||||
DB 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
|
||||
DB 111,114,103,62,0
|
||||
ALIGN 16
|
||||
EXTERN __imp_RtlVirtualUnwind:NEAR
|
||||
|
||||
|
||||
ALIGN 16
|
||||
se_handler PROC PRIVATE
|
||||
push rsi
|
||||
push rdi
|
||||
push rbx
|
||||
push rbp
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
pushfq
|
||||
sub rsp,64
|
||||
|
||||
mov rax,QWORD PTR[152+r8]
|
||||
mov rbx,QWORD PTR[248+r8]
|
||||
|
||||
lea r10,QWORD PTR[$L$body_mul_2x2]
|
||||
cmp rbx,r10
|
||||
jb $L$in_prologue
|
||||
|
||||
mov r14,QWORD PTR[80+rax]
|
||||
mov r13,QWORD PTR[88+rax]
|
||||
mov r12,QWORD PTR[96+rax]
|
||||
mov rbp,QWORD PTR[104+rax]
|
||||
mov rbx,QWORD PTR[112+rax]
|
||||
mov rdi,QWORD PTR[120+rax]
|
||||
mov rsi,QWORD PTR[128+rax]
|
||||
|
||||
mov QWORD PTR[144+r8],rbx
|
||||
mov QWORD PTR[160+r8],rbp
|
||||
mov QWORD PTR[168+r8],rsi
|
||||
mov QWORD PTR[176+r8],rdi
|
||||
mov QWORD PTR[216+r8],r12
|
||||
mov QWORD PTR[224+r8],r13
|
||||
mov QWORD PTR[232+r8],r14
|
||||
|
||||
$L$in_prologue::
|
||||
lea rax,QWORD PTR[136+rax]
|
||||
mov QWORD PTR[152+r8],rax
|
||||
|
||||
mov rdi,QWORD PTR[40+r9]
|
||||
mov rsi,r8
|
||||
mov ecx,154
|
||||
DD 0a548f3fch
|
||||
|
||||
|
||||
mov rsi,r9
|
||||
xor rcx,rcx
|
||||
mov rdx,QWORD PTR[8+rsi]
|
||||
mov r8,QWORD PTR[rsi]
|
||||
mov r9,QWORD PTR[16+rsi]
|
||||
mov r10,QWORD PTR[40+rsi]
|
||||
lea r11,QWORD PTR[56+rsi]
|
||||
lea r12,QWORD PTR[24+rsi]
|
||||
mov QWORD PTR[32+rsp],r10
|
||||
mov QWORD PTR[40+rsp],r11
|
||||
mov QWORD PTR[48+rsp],r12
|
||||
mov QWORD PTR[56+rsp],rcx
|
||||
call QWORD PTR[__imp_RtlVirtualUnwind]
|
||||
|
||||
mov eax,1
|
||||
add rsp,64
|
||||
popfq
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rbp
|
||||
pop rbx
|
||||
pop rdi
|
||||
pop rsi
|
||||
DB 0F3h,0C3h ;repret
|
||||
se_handler ENDP
|
||||
|
||||
.text$ ENDS
|
||||
.pdata SEGMENT READONLY ALIGN(4)
|
||||
ALIGN 4
|
||||
DD imagerel _mul_1x1
|
||||
DD imagerel $L$end_mul_1x1
|
||||
DD imagerel $L$SEH_info_1x1
|
||||
|
||||
DD imagerel $L$vanilla_mul_2x2
|
||||
DD imagerel $L$end_mul_2x2
|
||||
DD imagerel $L$SEH_info_2x2
|
||||
.pdata ENDS
|
||||
.xdata SEGMENT READONLY ALIGN(8)
|
||||
ALIGN 8
|
||||
$L$SEH_info_1x1::
|
||||
DB 001h,007h,002h,000h
|
||||
DB 007h,001h,011h,000h
|
||||
|
||||
$L$SEH_info_2x2::
|
||||
DB 9,0,0,0
|
||||
DD imagerel se_handler
|
||||
|
||||
.xdata ENDS
|
||||
END
|
990
deps/openssl/asm/x64-win32-masm/bn/x86_64-mont5.asm
vendored
Normal file
990
deps/openssl/asm/x64-win32-masm/bn/x86_64-mont5.asm
vendored
Normal file
@ -0,0 +1,990 @@
|
||||
OPTION DOTNAME
|
||||
.text$ SEGMENT ALIGN(64) 'CODE'
|
||||
|
||||
PUBLIC bn_mul_mont_gather5
|
||||
|
||||
ALIGN 64
|
||||
bn_mul_mont_gather5 PROC PUBLIC
|
||||
mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
|
||||
mov QWORD PTR[16+rsp],rsi
|
||||
mov rax,rsp
|
||||
$L$SEH_begin_bn_mul_mont_gather5::
|
||||
mov rdi,rcx
|
||||
mov rsi,rdx
|
||||
mov rdx,r8
|
||||
mov rcx,r9
|
||||
mov r8,QWORD PTR[40+rsp]
|
||||
mov r9,QWORD PTR[48+rsp]
|
||||
|
||||
|
||||
test r9d,3
|
||||
jnz $L$mul_enter
|
||||
cmp r9d,8
|
||||
jb $L$mul_enter
|
||||
jmp $L$mul4x_enter
|
||||
|
||||
ALIGN 16
|
||||
$L$mul_enter::
|
||||
mov r9d,r9d
|
||||
mov r10d,DWORD PTR[56+rsp]
|
||||
push rbx
|
||||
push rbp
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
lea rsp,QWORD PTR[((-40))+rsp]
|
||||
movaps XMMWORD PTR[rsp],xmm6
|
||||
movaps XMMWORD PTR[16+rsp],xmm7
|
||||
$L$mul_alloca::
|
||||
mov rax,rsp
|
||||
lea r11,QWORD PTR[2+r9]
|
||||
neg r11
|
||||
lea rsp,QWORD PTR[r11*8+rsp]
|
||||
and rsp,-1024
|
||||
|
||||
mov QWORD PTR[8+r9*8+rsp],rax
|
||||
$L$mul_body::
|
||||
mov r12,rdx
|
||||
mov r11,r10
|
||||
shr r10,3
|
||||
and r11,7
|
||||
not r10
|
||||
lea rax,QWORD PTR[$L$magic_masks]
|
||||
and r10,3
|
||||
lea r12,QWORD PTR[96+r11*8+r12]
|
||||
movq xmm4,QWORD PTR[r10*8+rax]
|
||||
movq xmm5,QWORD PTR[8+r10*8+rax]
|
||||
movq xmm6,QWORD PTR[16+r10*8+rax]
|
||||
movq xmm7,QWORD PTR[24+r10*8+rax]
|
||||
|
||||
movq xmm0,QWORD PTR[((-96))+r12]
|
||||
movq xmm1,QWORD PTR[((-32))+r12]
|
||||
pand xmm0,xmm4
|
||||
movq xmm2,QWORD PTR[32+r12]
|
||||
pand xmm1,xmm5
|
||||
movq xmm3,QWORD PTR[96+r12]
|
||||
pand xmm2,xmm6
|
||||
por xmm0,xmm1
|
||||
pand xmm3,xmm7
|
||||
por xmm0,xmm2
|
||||
lea r12,QWORD PTR[256+r12]
|
||||
por xmm0,xmm3
|
||||
|
||||
DB 102,72,15,126,195
|
||||
|
||||
mov r8,QWORD PTR[r8]
|
||||
mov rax,QWORD PTR[rsi]
|
||||
|
||||
xor r14,r14
|
||||
xor r15,r15
|
||||
|
||||
movq xmm0,QWORD PTR[((-96))+r12]
|
||||
movq xmm1,QWORD PTR[((-32))+r12]
|
||||
pand xmm0,xmm4
|
||||
movq xmm2,QWORD PTR[32+r12]
|
||||
pand xmm1,xmm5
|
||||
|
||||
mov rbp,r8
|
||||
mul rbx
|
||||
mov r10,rax
|
||||
mov rax,QWORD PTR[rcx]
|
||||
|
||||
movq xmm3,QWORD PTR[96+r12]
|
||||
pand xmm2,xmm6
|
||||
por xmm0,xmm1
|
||||
pand xmm3,xmm7
|
||||
|
||||
imul rbp,r10
|
||||
mov r11,rdx
|
||||
|
||||
por xmm0,xmm2
|
||||
lea r12,QWORD PTR[256+r12]
|
||||
por xmm0,xmm3
|
||||
|
||||
mul rbp
|
||||
add r10,rax
|
||||
mov rax,QWORD PTR[8+rsi]
|
||||
adc rdx,0
|
||||
mov r13,rdx
|
||||
|
||||
lea r15,QWORD PTR[1+r15]
|
||||
jmp $L$1st_enter
|
||||
|
||||
ALIGN 16
|
||||
$L$1st::
|
||||
add r13,rax
|
||||
mov rax,QWORD PTR[r15*8+rsi]
|
||||
adc rdx,0
|
||||
add r13,r11
|
||||
mov r11,r10
|
||||
adc rdx,0
|
||||
mov QWORD PTR[((-16))+r15*8+rsp],r13
|
||||
mov r13,rdx
|
||||
|
||||
$L$1st_enter::
|
||||
mul rbx
|
||||
add r11,rax
|
||||
mov rax,QWORD PTR[r15*8+rcx]
|
||||
adc rdx,0
|
||||
lea r15,QWORD PTR[1+r15]
|
||||
mov r10,rdx
|
||||
|
||||
mul rbp
|
||||
cmp r15,r9
|
||||
jne $L$1st
|
||||
|
||||
DB 102,72,15,126,195
|
||||
|
||||
add r13,rax
|
||||
mov rax,QWORD PTR[rsi]
|
||||
adc rdx,0
|
||||
add r13,r11
|
||||
adc rdx,0
|
||||
mov QWORD PTR[((-16))+r15*8+rsp],r13
|
||||
mov r13,rdx
|
||||
mov r11,r10
|
||||
|
||||
xor rdx,rdx
|
||||
add r13,r11
|
||||
adc rdx,0
|
||||
mov QWORD PTR[((-8))+r9*8+rsp],r13
|
||||
mov QWORD PTR[r9*8+rsp],rdx
|
||||
|
||||
lea r14,QWORD PTR[1+r14]
|
||||
jmp $L$outer
|
||||
ALIGN 16
|
||||
$L$outer::
|
||||
xor r15,r15
|
||||
mov rbp,r8
|
||||
mov r10,QWORD PTR[rsp]
|
||||
|
||||
movq xmm0,QWORD PTR[((-96))+r12]
|
||||
movq xmm1,QWORD PTR[((-32))+r12]
|
||||
pand xmm0,xmm4
|
||||
movq xmm2,QWORD PTR[32+r12]
|
||||
pand xmm1,xmm5
|
||||
|
||||
mul rbx
|
||||
add r10,rax
|
||||
mov rax,QWORD PTR[rcx]
|
||||
adc rdx,0
|
||||
|
||||
movq xmm3,QWORD PTR[96+r12]
|
||||
pand xmm2,xmm6
|
||||
por xmm0,xmm1
|
||||
pand xmm3,xmm7
|
||||
|
||||
imul rbp,r10
|
||||
mov r11,rdx
|
||||
|
||||
por xmm0,xmm2
|
||||
lea r12,QWORD PTR[256+r12]
|
||||
por xmm0,xmm3
|
||||
|
||||
mul rbp
|
||||
add r10,rax
|
||||
mov rax,QWORD PTR[8+rsi]
|
||||
adc rdx,0
|
||||
mov r10,QWORD PTR[8+rsp]
|
||||
mov r13,rdx
|
||||
|
||||
lea r15,QWORD PTR[1+r15]
|
||||
jmp $L$inner_enter
|
||||
|
||||
ALIGN 16
|
||||
$L$inner::
|
||||
add r13,rax
|
||||
mov rax,QWORD PTR[r15*8+rsi]
|
||||
adc rdx,0
|
||||
add r13,r10
|
||||
mov r10,QWORD PTR[r15*8+rsp]
|
||||
adc rdx,0
|
||||
mov QWORD PTR[((-16))+r15*8+rsp],r13
|
||||
mov r13,rdx
|
||||
|
||||
$L$inner_enter::
|
||||
mul rbx
|
||||
add r11,rax
|
||||
mov rax,QWORD PTR[r15*8+rcx]
|
||||
adc rdx,0
|
||||
add r10,r11
|
||||
mov r11,rdx
|
||||
adc r11,0
|
||||
lea r15,QWORD PTR[1+r15]
|
||||
|
||||
mul rbp
|
||||
cmp r15,r9
|
||||
jne $L$inner
|
||||
|
||||
DB 102,72,15,126,195
|
||||
|
||||
add r13,rax
|
||||
mov rax,QWORD PTR[rsi]
|
||||
adc rdx,0
|
||||
add r13,r10
|
||||
mov r10,QWORD PTR[r15*8+rsp]
|
||||
adc rdx,0
|
||||
mov QWORD PTR[((-16))+r15*8+rsp],r13
|
||||
mov r13,rdx
|
||||
|
||||
xor rdx,rdx
|
||||
add r13,r11
|
||||
adc rdx,0
|
||||
add r13,r10
|
||||
adc rdx,0
|
||||
mov QWORD PTR[((-8))+r9*8+rsp],r13
|
||||
mov QWORD PTR[r9*8+rsp],rdx
|
||||
|
||||
lea r14,QWORD PTR[1+r14]
|
||||
cmp r14,r9
|
||||
jl $L$outer
|
||||
|
||||
xor r14,r14
|
||||
mov rax,QWORD PTR[rsp]
|
||||
lea rsi,QWORD PTR[rsp]
|
||||
mov r15,r9
|
||||
jmp $L$sub
|
||||
ALIGN 16
|
||||
$L$sub:: sbb rax,QWORD PTR[r14*8+rcx]
|
||||
mov QWORD PTR[r14*8+rdi],rax
|
||||
mov rax,QWORD PTR[8+r14*8+rsi]
|
||||
lea r14,QWORD PTR[1+r14]
|
||||
dec r15
|
||||
jnz $L$sub
|
||||
|
||||
sbb rax,0
|
||||
xor r14,r14
|
||||
and rsi,rax
|
||||
not rax
|
||||
mov rcx,rdi
|
||||
and rcx,rax
|
||||
mov r15,r9
|
||||
or rsi,rcx
|
||||
ALIGN 16
|
||||
$L$copy::
|
||||
mov rax,QWORD PTR[r14*8+rsi]
|
||||
mov QWORD PTR[r14*8+rsp],r14
|
||||
mov QWORD PTR[r14*8+rdi],rax
|
||||
lea r14,QWORD PTR[1+r14]
|
||||
sub r15,1
|
||||
jnz $L$copy
|
||||
|
||||
mov rsi,QWORD PTR[8+r9*8+rsp]
|
||||
mov rax,1
|
||||
movaps xmm6,XMMWORD PTR[rsi]
|
||||
movaps xmm7,XMMWORD PTR[16+rsi]
|
||||
lea rsi,QWORD PTR[40+rsi]
|
||||
mov r15,QWORD PTR[rsi]
|
||||
mov r14,QWORD PTR[8+rsi]
|
||||
mov r13,QWORD PTR[16+rsi]
|
||||
mov r12,QWORD PTR[24+rsi]
|
||||
mov rbp,QWORD PTR[32+rsi]
|
||||
mov rbx,QWORD PTR[40+rsi]
|
||||
lea rsp,QWORD PTR[48+rsi]
|
||||
$L$mul_epilogue::
|
||||
mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
|
||||
mov rsi,QWORD PTR[16+rsp]
|
||||
DB 0F3h,0C3h ;repret
|
||||
$L$SEH_end_bn_mul_mont_gather5::
|
||||
bn_mul_mont_gather5 ENDP
|
||||
|
||||
ALIGN 16
|
||||
bn_mul4x_mont_gather5 PROC PRIVATE
|
||||
mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
|
||||
mov QWORD PTR[16+rsp],rsi
|
||||
mov rax,rsp
|
||||
$L$SEH_begin_bn_mul4x_mont_gather5::
|
||||
mov rdi,rcx
|
||||
mov rsi,rdx
|
||||
mov rdx,r8
|
||||
mov rcx,r9
|
||||
mov r8,QWORD PTR[40+rsp]
|
||||
mov r9,QWORD PTR[48+rsp]
|
||||
|
||||
|
||||
$L$mul4x_enter::
|
||||
mov r9d,r9d
|
||||
mov r10d,DWORD PTR[56+rsp]
|
||||
push rbx
|
||||
push rbp
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
lea rsp,QWORD PTR[((-40))+rsp]
|
||||
movaps XMMWORD PTR[rsp],xmm6
|
||||
movaps XMMWORD PTR[16+rsp],xmm7
|
||||
$L$mul4x_alloca::
|
||||
mov rax,rsp
|
||||
lea r11,QWORD PTR[4+r9]
|
||||
neg r11
|
||||
lea rsp,QWORD PTR[r11*8+rsp]
|
||||
and rsp,-1024
|
||||
|
||||
mov QWORD PTR[8+r9*8+rsp],rax
|
||||
$L$mul4x_body::
|
||||
mov QWORD PTR[16+r9*8+rsp],rdi
|
||||
mov r12,rdx
|
||||
mov r11,r10
|
||||
shr r10,3
|
||||
and r11,7
|
||||
not r10
|
||||
lea rax,QWORD PTR[$L$magic_masks]
|
||||
and r10,3
|
||||
lea r12,QWORD PTR[96+r11*8+r12]
|
||||
movq xmm4,QWORD PTR[r10*8+rax]
|
||||
movq xmm5,QWORD PTR[8+r10*8+rax]
|
||||
movq xmm6,QWORD PTR[16+r10*8+rax]
|
||||
movq xmm7,QWORD PTR[24+r10*8+rax]
|
||||
|
||||
movq xmm0,QWORD PTR[((-96))+r12]
|
||||
movq xmm1,QWORD PTR[((-32))+r12]
|
||||
pand xmm0,xmm4
|
||||
movq xmm2,QWORD PTR[32+r12]
|
||||
pand xmm1,xmm5
|
||||
movq xmm3,QWORD PTR[96+r12]
|
||||
pand xmm2,xmm6
|
||||
por xmm0,xmm1
|
||||
pand xmm3,xmm7
|
||||
por xmm0,xmm2
|
||||
lea r12,QWORD PTR[256+r12]
|
||||
por xmm0,xmm3
|
||||
|
||||
DB 102,72,15,126,195
|
||||
mov r8,QWORD PTR[r8]
|
||||
mov rax,QWORD PTR[rsi]
|
||||
|
||||
xor r14,r14
|
||||
xor r15,r15
|
||||
|
||||
movq xmm0,QWORD PTR[((-96))+r12]
|
||||
movq xmm1,QWORD PTR[((-32))+r12]
|
||||
pand xmm0,xmm4
|
||||
movq xmm2,QWORD PTR[32+r12]
|
||||
pand xmm1,xmm5
|
||||
|
||||
mov rbp,r8
|
||||
mul rbx
|
||||
mov r10,rax
|
||||
mov rax,QWORD PTR[rcx]
|
||||
|
||||
movq xmm3,QWORD PTR[96+r12]
|
||||
pand xmm2,xmm6
|
||||
por xmm0,xmm1
|
||||
pand xmm3,xmm7
|
||||
|
||||
imul rbp,r10
|
||||
mov r11,rdx
|
||||
|
||||
por xmm0,xmm2
|
||||
lea r12,QWORD PTR[256+r12]
|
||||
por xmm0,xmm3
|
||||
|
||||
mul rbp
|
||||
add r10,rax
|
||||
mov rax,QWORD PTR[8+rsi]
|
||||
adc rdx,0
|
||||
mov rdi,rdx
|
||||
|
||||
mul rbx
|
||||
add r11,rax
|
||||
mov rax,QWORD PTR[8+rcx]
|
||||
adc rdx,0
|
||||
mov r10,rdx
|
||||
|
||||
mul rbp
|
||||
add rdi,rax
|
||||
mov rax,QWORD PTR[16+rsi]
|
||||
adc rdx,0
|
||||
add rdi,r11
|
||||
lea r15,QWORD PTR[4+r15]
|
||||
adc rdx,0
|
||||
mov QWORD PTR[rsp],rdi
|
||||
mov r13,rdx
|
||||
jmp $L$1st4x
|
||||
ALIGN 16
|
||||
$L$1st4x::
|
||||
mul rbx
|
||||
add r10,rax
|
||||
mov rax,QWORD PTR[((-16))+r15*8+rcx]
|
||||
adc rdx,0
|
||||
mov r11,rdx
|
||||
|
||||
mul rbp
|
||||
add r13,rax
|
||||
mov rax,QWORD PTR[((-8))+r15*8+rsi]
|
||||
adc rdx,0
|
||||
add r13,r10
|
||||
adc rdx,0
|
||||
mov QWORD PTR[((-24))+r15*8+rsp],r13
|
||||
mov rdi,rdx
|
||||
|
||||
mul rbx
|
||||
add r11,rax
|
||||
mov rax,QWORD PTR[((-8))+r15*8+rcx]
|
||||
adc rdx,0
|
||||
mov r10,rdx
|
||||
|
||||
mul rbp
|
||||
add rdi,rax
|
||||
mov rax,QWORD PTR[r15*8+rsi]
|
||||
adc rdx,0
|
||||
add rdi,r11
|
||||
adc rdx,0
|
||||
mov QWORD PTR[((-16))+r15*8+rsp],rdi
|
||||
mov r13,rdx
|
||||
|
||||
mul rbx
|
||||
add r10,rax
|
||||
mov rax,QWORD PTR[r15*8+rcx]
|
||||
adc rdx,0
|
||||
mov r11,rdx
|
||||
|
||||
mul rbp
|
||||
add r13,rax
|
||||
mov rax,QWORD PTR[8+r15*8+rsi]
|
||||
adc rdx,0
|
||||
add r13,r10
|
||||
adc rdx,0
|
||||
mov QWORD PTR[((-8))+r15*8+rsp],r13
|
||||
mov rdi,rdx
|
||||
|
||||
mul rbx
|
||||
add r11,rax
|
||||
mov rax,QWORD PTR[8+r15*8+rcx]
|
||||
adc rdx,0
|
||||
lea r15,QWORD PTR[4+r15]
|
||||
mov r10,rdx
|
||||
|
||||
mul rbp
|
||||
add rdi,rax
|
||||
mov rax,QWORD PTR[((-16))+r15*8+rsi]
|
||||
adc rdx,0
|
||||
add rdi,r11
|
||||
adc rdx,0
|
||||
mov QWORD PTR[((-32))+r15*8+rsp],rdi
|
||||
mov r13,rdx
|
||||
cmp r15,r9
|
||||
jl $L$1st4x
|
||||
|
||||
mul rbx
|
||||
add r10,rax
|
||||
mov rax,QWORD PTR[((-16))+r15*8+rcx]
|
||||
adc rdx,0
|
||||
mov r11,rdx
|
||||
|
||||
mul rbp
|
||||
add r13,rax
|
||||
mov rax,QWORD PTR[((-8))+r15*8+rsi]
|
||||
adc rdx,0
|
||||
add r13,r10
|
||||
adc rdx,0
|
||||
mov QWORD PTR[((-24))+r15*8+rsp],r13
|
||||
mov rdi,rdx
|
||||
|
||||
mul rbx
|
||||
add r11,rax
|
||||
mov rax,QWORD PTR[((-8))+r15*8+rcx]
|
||||
adc rdx,0
|
||||
mov r10,rdx
|
||||
|
||||
mul rbp
|
||||
add rdi,rax
|
||||
mov rax,QWORD PTR[rsi]
|
||||
adc rdx,0
|
||||
add rdi,r11
|
||||
adc rdx,0
|
||||
mov QWORD PTR[((-16))+r15*8+rsp],rdi
|
||||
mov r13,rdx
|
||||
|
||||
DB 102,72,15,126,195
|
||||
|
||||
xor rdi,rdi
|
||||
add r13,r10
|
||||
adc rdi,0
|
||||
mov QWORD PTR[((-8))+r15*8+rsp],r13
|
||||
mov QWORD PTR[r15*8+rsp],rdi
|
||||
|
||||
lea r14,QWORD PTR[1+r14]
|
||||
ALIGN 4
|
||||
$L$outer4x::
|
||||
xor r15,r15
|
||||
movq xmm0,QWORD PTR[((-96))+r12]
|
||||
movq xmm1,QWORD PTR[((-32))+r12]
|
||||
pand xmm0,xmm4
|
||||
movq xmm2,QWORD PTR[32+r12]
|
||||
pand xmm1,xmm5
|
||||
|
||||
mov r10,QWORD PTR[rsp]
|
||||
mov rbp,r8
|
||||
mul rbx
|
||||
add r10,rax
|
||||
mov rax,QWORD PTR[rcx]
|
||||
adc rdx,0
|
||||
|
||||
movq xmm3,QWORD PTR[96+r12]
|
||||
pand xmm2,xmm6
|
||||
por xmm0,xmm1
|
||||
pand xmm3,xmm7
|
||||
|
||||
imul rbp,r10
|
||||
mov r11,rdx
|
||||
|
||||
por xmm0,xmm2
|
||||
lea r12,QWORD PTR[256+r12]
|
||||
por xmm0,xmm3
|
||||
|
||||
mul rbp
|
||||
add r10,rax
|
||||
mov rax,QWORD PTR[8+rsi]
|
||||
adc rdx,0
|
||||
mov rdi,rdx
|
||||
|
||||
mul rbx
|
||||
add r11,rax
|
||||
mov rax,QWORD PTR[8+rcx]
|
||||
adc rdx,0
|
||||
add r11,QWORD PTR[8+rsp]
|
||||
adc rdx,0
|
||||
mov r10,rdx
|
||||
|
||||
mul rbp
|
||||
add rdi,rax
|
||||
mov rax,QWORD PTR[16+rsi]
|
||||
adc rdx,0
|
||||
add rdi,r11
|
||||
lea r15,QWORD PTR[4+r15]
|
||||
adc rdx,0
|
||||
mov r13,rdx
|
||||
jmp $L$inner4x
|
||||
ALIGN 16
|
||||
$L$inner4x::
|
||||
mul rbx
|
||||
add r10,rax
|
||||
mov rax,QWORD PTR[((-16))+r15*8+rcx]
|
||||
adc rdx,0
|
||||
add r10,QWORD PTR[((-16))+r15*8+rsp]
|
||||
adc rdx,0
|
||||
mov r11,rdx
|
||||
|
||||
mul rbp
|
||||
add r13,rax
|
||||
mov rax,QWORD PTR[((-8))+r15*8+rsi]
|
||||
adc rdx,0
|
||||
add r13,r10
|
||||
adc rdx,0
|
||||
mov QWORD PTR[((-32))+r15*8+rsp],rdi
|
||||
mov rdi,rdx
|
||||
|
||||
mul rbx
|
||||
add r11,rax
|
||||
mov rax,QWORD PTR[((-8))+r15*8+rcx]
|
||||
adc rdx,0
|
||||
add r11,QWORD PTR[((-8))+r15*8+rsp]
|
||||
adc rdx,0
|
||||
mov r10,rdx
|
||||
|
||||
mul rbp
|
||||
add rdi,rax
|
||||
mov rax,QWORD PTR[r15*8+rsi]
|
||||
adc rdx,0
|
||||
add rdi,r11
|
||||
adc rdx,0
|
||||
mov QWORD PTR[((-24))+r15*8+rsp],r13
|
||||
mov r13,rdx
|
||||
|
||||
mul rbx
|
||||
add r10,rax
|
||||
mov rax,QWORD PTR[r15*8+rcx]
|
||||
adc rdx,0
|
||||
add r10,QWORD PTR[r15*8+rsp]
|
||||
adc rdx,0
|
||||
mov r11,rdx
|
||||
|
||||
mul rbp
|
||||
add r13,rax
|
||||
mov rax,QWORD PTR[8+r15*8+rsi]
|
||||
adc rdx,0
|
||||
add r13,r10
|
||||
adc rdx,0
|
||||
mov QWORD PTR[((-16))+r15*8+rsp],rdi
|
||||
mov rdi,rdx
|
||||
|
||||
mul rbx
|
||||
add r11,rax
|
||||
mov rax,QWORD PTR[8+r15*8+rcx]
|
||||
adc rdx,0
|
||||
add r11,QWORD PTR[8+r15*8+rsp]
|
||||
adc rdx,0
|
||||
lea r15,QWORD PTR[4+r15]
|
||||
mov r10,rdx
|
||||
|
||||
mul rbp
|
||||
add rdi,rax
|
||||
mov rax,QWORD PTR[((-16))+r15*8+rsi]
|
||||
adc rdx,0
|
||||
add rdi,r11
|
||||
adc rdx,0
|
||||
mov QWORD PTR[((-40))+r15*8+rsp],r13
|
||||
mov r13,rdx
|
||||
cmp r15,r9
|
||||
jl $L$inner4x
|
||||
|
||||
mul rbx
|
||||
add r10,rax
|
||||
mov rax,QWORD PTR[((-16))+r15*8+rcx]
|
||||
adc rdx,0
|
||||
add r10,QWORD PTR[((-16))+r15*8+rsp]
|
||||
adc rdx,0
|
||||
mov r11,rdx
|
||||
|
||||
mul rbp
|
||||
add r13,rax
|
||||
mov rax,QWORD PTR[((-8))+r15*8+rsi]
|
||||
adc rdx,0
|
||||
add r13,r10
|
||||
adc rdx,0
|
||||
mov QWORD PTR[((-32))+r15*8+rsp],rdi
|
||||
mov rdi,rdx
|
||||
|
||||
mul rbx
|
||||
add r11,rax
|
||||
mov rax,QWORD PTR[((-8))+r15*8+rcx]
|
||||
adc rdx,0
|
||||
add r11,QWORD PTR[((-8))+r15*8+rsp]
|
||||
adc rdx,0
|
||||
lea r14,QWORD PTR[1+r14]
|
||||
mov r10,rdx
|
||||
|
||||
mul rbp
|
||||
add rdi,rax
|
||||
mov rax,QWORD PTR[rsi]
|
||||
adc rdx,0
|
||||
add rdi,r11
|
||||
adc rdx,0
|
||||
mov QWORD PTR[((-24))+r15*8+rsp],r13
|
||||
mov r13,rdx
|
||||
|
||||
DB 102,72,15,126,195
|
||||
mov QWORD PTR[((-16))+r15*8+rsp],rdi
|
||||
|
||||
xor rdi,rdi
|
||||
add r13,r10
|
||||
adc rdi,0
|
||||
add r13,QWORD PTR[r9*8+rsp]
|
||||
adc rdi,0
|
||||
mov QWORD PTR[((-8))+r15*8+rsp],r13
|
||||
mov QWORD PTR[r15*8+rsp],rdi
|
||||
|
||||
cmp r14,r9
|
||||
jl $L$outer4x
|
||||
mov rdi,QWORD PTR[16+r9*8+rsp]
|
||||
mov rax,QWORD PTR[rsp]
|
||||
pxor xmm0,xmm0
|
||||
mov rdx,QWORD PTR[8+rsp]
|
||||
shr r9,2
|
||||
lea rsi,QWORD PTR[rsp]
|
||||
xor r14,r14
|
||||
|
||||
sub rax,QWORD PTR[rcx]
|
||||
mov rbx,QWORD PTR[16+rsi]
|
||||
mov rbp,QWORD PTR[24+rsi]
|
||||
sbb rdx,QWORD PTR[8+rcx]
|
||||
lea r15,QWORD PTR[((-1))+r9]
|
||||
jmp $L$sub4x
|
||||
ALIGN 16
|
||||
$L$sub4x::
|
||||
mov QWORD PTR[r14*8+rdi],rax
|
||||
mov QWORD PTR[8+r14*8+rdi],rdx
|
||||
sbb rbx,QWORD PTR[16+r14*8+rcx]
|
||||
mov rax,QWORD PTR[32+r14*8+rsi]
|
||||
mov rdx,QWORD PTR[40+r14*8+rsi]
|
||||
sbb rbp,QWORD PTR[24+r14*8+rcx]
|
||||
mov QWORD PTR[16+r14*8+rdi],rbx
|
||||
mov QWORD PTR[24+r14*8+rdi],rbp
|
||||
sbb rax,QWORD PTR[32+r14*8+rcx]
|
||||
mov rbx,QWORD PTR[48+r14*8+rsi]
|
||||
mov rbp,QWORD PTR[56+r14*8+rsi]
|
||||
sbb rdx,QWORD PTR[40+r14*8+rcx]
|
||||
lea r14,QWORD PTR[4+r14]
|
||||
dec r15
|
||||
jnz $L$sub4x
|
||||
|
||||
mov QWORD PTR[r14*8+rdi],rax
|
||||
mov rax,QWORD PTR[32+r14*8+rsi]
|
||||
sbb rbx,QWORD PTR[16+r14*8+rcx]
|
||||
mov QWORD PTR[8+r14*8+rdi],rdx
|
||||
sbb rbp,QWORD PTR[24+r14*8+rcx]
|
||||
mov QWORD PTR[16+r14*8+rdi],rbx
|
||||
|
||||
sbb rax,0
|
||||
mov QWORD PTR[24+r14*8+rdi],rbp
|
||||
xor r14,r14
|
||||
and rsi,rax
|
||||
not rax
|
||||
mov rcx,rdi
|
||||
and rcx,rax
|
||||
lea r15,QWORD PTR[((-1))+r9]
|
||||
or rsi,rcx
|
||||
|
||||
movdqu xmm1,XMMWORD PTR[rsi]
|
||||
movdqa XMMWORD PTR[rsp],xmm0
|
||||
movdqu XMMWORD PTR[rdi],xmm1
|
||||
jmp $L$copy4x
|
||||
ALIGN 16
|
||||
$L$copy4x::
|
||||
movdqu xmm2,XMMWORD PTR[16+r14*1+rsi]
|
||||
movdqu xmm1,XMMWORD PTR[32+r14*1+rsi]
|
||||
movdqa XMMWORD PTR[16+r14*1+rsp],xmm0
|
||||
movdqu XMMWORD PTR[16+r14*1+rdi],xmm2
|
||||
movdqa XMMWORD PTR[32+r14*1+rsp],xmm0
|
||||
movdqu XMMWORD PTR[32+r14*1+rdi],xmm1
|
||||
lea r14,QWORD PTR[32+r14]
|
||||
dec r15
|
||||
jnz $L$copy4x
|
||||
|
||||
shl r9,2
|
||||
movdqu xmm2,XMMWORD PTR[16+r14*1+rsi]
|
||||
movdqa XMMWORD PTR[16+r14*1+rsp],xmm0
|
||||
movdqu XMMWORD PTR[16+r14*1+rdi],xmm2
|
||||
mov rsi,QWORD PTR[8+r9*8+rsp]
|
||||
mov rax,1
|
||||
movaps xmm6,XMMWORD PTR[rsi]
|
||||
movaps xmm7,XMMWORD PTR[16+rsi]
|
||||
lea rsi,QWORD PTR[40+rsi]
|
||||
mov r15,QWORD PTR[rsi]
|
||||
mov r14,QWORD PTR[8+rsi]
|
||||
mov r13,QWORD PTR[16+rsi]
|
||||
mov r12,QWORD PTR[24+rsi]
|
||||
mov rbp,QWORD PTR[32+rsi]
|
||||
mov rbx,QWORD PTR[40+rsi]
|
||||
lea rsp,QWORD PTR[48+rsi]
|
||||
$L$mul4x_epilogue::
|
||||
mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
|
||||
mov rsi,QWORD PTR[16+rsp]
|
||||
DB 0F3h,0C3h ;repret
|
||||
$L$SEH_end_bn_mul4x_mont_gather5::
|
||||
bn_mul4x_mont_gather5 ENDP
|
||||
PUBLIC bn_scatter5
|
||||
|
||||
ALIGN 16
|
||||
bn_scatter5 PROC PUBLIC
|
||||
cmp rdx,0
|
||||
jz $L$scatter_epilogue
|
||||
lea r8,QWORD PTR[r9*8+r8]
|
||||
$L$scatter::
|
||||
mov rax,QWORD PTR[rcx]
|
||||
lea rcx,QWORD PTR[8+rcx]
|
||||
mov QWORD PTR[r8],rax
|
||||
lea r8,QWORD PTR[256+r8]
|
||||
sub rdx,1
|
||||
jnz $L$scatter
|
||||
$L$scatter_epilogue::
|
||||
DB 0F3h,0C3h ;repret
|
||||
bn_scatter5 ENDP
|
||||
|
||||
PUBLIC bn_gather5
|
||||
|
||||
ALIGN 16
|
||||
bn_gather5 PROC PUBLIC
|
||||
$L$SEH_begin_bn_gather5::
|
||||
|
||||
DB 048h,083h,0ech,028h
|
||||
|
||||
DB 00fh,029h,034h,024h
|
||||
|
||||
DB 00fh,029h,07ch,024h,010h
|
||||
|
||||
mov r11,r9
|
||||
shr r9,3
|
||||
and r11,7
|
||||
not r9
|
||||
lea rax,QWORD PTR[$L$magic_masks]
|
||||
and r9,3
|
||||
lea r8,QWORD PTR[96+r11*8+r8]
|
||||
movq xmm4,QWORD PTR[r9*8+rax]
|
||||
movq xmm5,QWORD PTR[8+r9*8+rax]
|
||||
movq xmm6,QWORD PTR[16+r9*8+rax]
|
||||
movq xmm7,QWORD PTR[24+r9*8+rax]
|
||||
jmp $L$gather
|
||||
ALIGN 16
|
||||
$L$gather::
|
||||
movq xmm0,QWORD PTR[((-96))+r8]
|
||||
movq xmm1,QWORD PTR[((-32))+r8]
|
||||
pand xmm0,xmm4
|
||||
movq xmm2,QWORD PTR[32+r8]
|
||||
pand xmm1,xmm5
|
||||
movq xmm3,QWORD PTR[96+r8]
|
||||
pand xmm2,xmm6
|
||||
por xmm0,xmm1
|
||||
pand xmm3,xmm7
|
||||
por xmm0,xmm2
|
||||
lea r8,QWORD PTR[256+r8]
|
||||
por xmm0,xmm3
|
||||
|
||||
movq QWORD PTR[rcx],xmm0
|
||||
lea rcx,QWORD PTR[8+rcx]
|
||||
sub rdx,1
|
||||
jnz $L$gather
|
||||
movaps XMMWORD PTR[rsp],xmm6
|
||||
movaps XMMWORD PTR[16+rsp],xmm7
|
||||
lea rsp,QWORD PTR[40+rsp]
|
||||
DB 0F3h,0C3h ;repret
|
||||
$L$SEH_end_bn_gather5::
|
||||
bn_gather5 ENDP
|
||||
ALIGN 64
|
||||
$L$magic_masks::
|
||||
DD 0,0,0,0,0,0,-1,-1
|
||||
DD 0,0,0,0,0,0,0,0
|
||||
DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
|
||||
DB 112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115
|
||||
DB 99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111
|
||||
DB 114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79
|
||||
DB 71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111
|
||||
DB 112,101,110,115,115,108,46,111,114,103,62,0
|
||||
EXTERN __imp_RtlVirtualUnwind:NEAR
|
||||
|
||||
ALIGN 16
|
||||
mul_handler PROC PRIVATE
|
||||
push rsi
|
||||
push rdi
|
||||
push rbx
|
||||
push rbp
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
pushfq
|
||||
sub rsp,64
|
||||
|
||||
mov rax,QWORD PTR[120+r8]
|
||||
mov rbx,QWORD PTR[248+r8]
|
||||
|
||||
mov rsi,QWORD PTR[8+r9]
|
||||
mov r11,QWORD PTR[56+r9]
|
||||
|
||||
mov r10d,DWORD PTR[r11]
|
||||
lea r10,QWORD PTR[r10*1+rsi]
|
||||
cmp rbx,r10
|
||||
jb $L$common_seh_tail
|
||||
|
||||
lea rax,QWORD PTR[88+rax]
|
||||
|
||||
mov r10d,DWORD PTR[4+r11]
|
||||
lea r10,QWORD PTR[r10*1+rsi]
|
||||
cmp rbx,r10
|
||||
jb $L$common_seh_tail
|
||||
|
||||
mov rax,QWORD PTR[152+r8]
|
||||
|
||||
mov r10d,DWORD PTR[8+r11]
|
||||
lea r10,QWORD PTR[r10*1+rsi]
|
||||
cmp rbx,r10
|
||||
jae $L$common_seh_tail
|
||||
|
||||
mov r10,QWORD PTR[192+r8]
|
||||
mov rax,QWORD PTR[8+r10*8+rax]
|
||||
|
||||
movaps xmm0,XMMWORD PTR[rax]
|
||||
movaps xmm1,XMMWORD PTR[16+rax]
|
||||
lea rax,QWORD PTR[88+rax]
|
||||
|
||||
mov rbx,QWORD PTR[((-8))+rax]
|
||||
mov rbp,QWORD PTR[((-16))+rax]
|
||||
mov r12,QWORD PTR[((-24))+rax]
|
||||
mov r13,QWORD PTR[((-32))+rax]
|
||||
mov r14,QWORD PTR[((-40))+rax]
|
||||
mov r15,QWORD PTR[((-48))+rax]
|
||||
mov QWORD PTR[144+r8],rbx
|
||||
mov QWORD PTR[160+r8],rbp
|
||||
mov QWORD PTR[216+r8],r12
|
||||
mov QWORD PTR[224+r8],r13
|
||||
mov QWORD PTR[232+r8],r14
|
||||
mov QWORD PTR[240+r8],r15
|
||||
movups XMMWORD PTR[512+r8],xmm0
|
||||
movups XMMWORD PTR[528+r8],xmm1
|
||||
|
||||
$L$common_seh_tail::
|
||||
mov rdi,QWORD PTR[8+rax]
|
||||
mov rsi,QWORD PTR[16+rax]
|
||||
mov QWORD PTR[152+r8],rax
|
||||
mov QWORD PTR[168+r8],rsi
|
||||
mov QWORD PTR[176+r8],rdi
|
||||
|
||||
mov rdi,QWORD PTR[40+r9]
|
||||
mov rsi,r8
|
||||
mov ecx,154
|
||||
DD 0a548f3fch
|
||||
|
||||
|
||||
mov rsi,r9
|
||||
xor rcx,rcx
|
||||
mov rdx,QWORD PTR[8+rsi]
|
||||
mov r8,QWORD PTR[rsi]
|
||||
mov r9,QWORD PTR[16+rsi]
|
||||
mov r10,QWORD PTR[40+rsi]
|
||||
lea r11,QWORD PTR[56+rsi]
|
||||
lea r12,QWORD PTR[24+rsi]
|
||||
mov QWORD PTR[32+rsp],r10
|
||||
mov QWORD PTR[40+rsp],r11
|
||||
mov QWORD PTR[48+rsp],r12
|
||||
mov QWORD PTR[56+rsp],rcx
|
||||
call QWORD PTR[__imp_RtlVirtualUnwind]
|
||||
|
||||
mov eax,1
|
||||
add rsp,64
|
||||
popfq
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rbp
|
||||
pop rbx
|
||||
pop rdi
|
||||
pop rsi
|
||||
DB 0F3h,0C3h ;repret
|
||||
mul_handler ENDP
|
||||
|
||||
.text$ ENDS
|
||||
.pdata SEGMENT READONLY ALIGN(4)
|
||||
ALIGN 4
|
||||
DD imagerel $L$SEH_begin_bn_mul_mont_gather5
|
||||
DD imagerel $L$SEH_end_bn_mul_mont_gather5
|
||||
DD imagerel $L$SEH_info_bn_mul_mont_gather5
|
||||
|
||||
DD imagerel $L$SEH_begin_bn_mul4x_mont_gather5
|
||||
DD imagerel $L$SEH_end_bn_mul4x_mont_gather5
|
||||
DD imagerel $L$SEH_info_bn_mul4x_mont_gather5
|
||||
|
||||
DD imagerel $L$SEH_begin_bn_gather5
|
||||
DD imagerel $L$SEH_end_bn_gather5
|
||||
DD imagerel $L$SEH_info_bn_gather5
|
||||
|
||||
.pdata ENDS
|
||||
.xdata SEGMENT READONLY ALIGN(8)
|
||||
ALIGN 8
|
||||
$L$SEH_info_bn_mul_mont_gather5::
|
||||
DB 9,0,0,0
|
||||
DD imagerel mul_handler
|
||||
DD imagerel $L$mul_alloca,imagerel $L$mul_body,imagerel $L$mul_epilogue
|
||||
|
||||
ALIGN 8
|
||||
$L$SEH_info_bn_mul4x_mont_gather5::
|
||||
DB 9,0,0,0
|
||||
DD imagerel mul_handler
|
||||
DD imagerel $L$mul4x_alloca,imagerel $L$mul4x_body,imagerel $L$mul4x_epilogue
|
||||
|
||||
ALIGN 8
|
||||
$L$SEH_info_bn_gather5::
|
||||
DB 001h,00dh,005h,000h
|
||||
DB 00dh,078h,001h,000h
|
||||
|
||||
DB 008h,068h,000h,000h
|
||||
|
||||
DB 004h,042h,000h,000h
|
||||
|
||||
ALIGN 8
|
||||
|
||||
.xdata ENDS
|
||||
END
|
19
deps/openssl/openssl.gyp
vendored
19
deps/openssl/openssl.gyp
vendored
@ -696,6 +696,7 @@
|
||||
'LIB_BN_ASM',
|
||||
'MD5_ASM',
|
||||
'OPENSSL_BN_ASM',
|
||||
'OPENSSL_BN_ASM_MONT',
|
||||
'OPENSSL_CPUID_OBJ',
|
||||
'RIP_ASM',
|
||||
'RMD160_ASM',
|
||||
@ -730,12 +731,18 @@
|
||||
]
|
||||
}],
|
||||
['OS!="win" and OS!="mac" and target_arch=="x64"', {
|
||||
'defines': [
|
||||
'OPENSSL_BN_ASM_MONT5',
|
||||
'OPENSSL_BN_ASM_GF2m',
|
||||
],
|
||||
'sources': [
|
||||
'asm/x64-elf-gas/aes/aes-x86_64.s',
|
||||
'asm/x64-elf-gas/aes/aesni-x86_64.s',
|
||||
'asm/x64-elf-gas/aes/aesni-sha1-x86_64.s',
|
||||
'asm/x64-elf-gas/bn/modexp512-x86_64.s',
|
||||
'asm/x64-elf-gas/bn/x86_64-mont.s',
|
||||
'asm/x64-elf-gas/bn/x86_64-mont5.s',
|
||||
'asm/x64-elf-gas/bn/x86_64-gf2m.s',
|
||||
'asm/x64-elf-gas/camellia/cmll-x86_64.s',
|
||||
'asm/x64-elf-gas/md5/md5-x86_64.s',
|
||||
'asm/x64-elf-gas/rc4/rc4-x86_64.s',
|
||||
@ -779,12 +786,18 @@
|
||||
]
|
||||
}],
|
||||
['OS=="mac" and target_arch=="x64"', {
|
||||
'defines': [
|
||||
'OPENSSL_BN_ASM_MONT5',
|
||||
'OPENSSL_BN_ASM_GF2m',
|
||||
],
|
||||
'sources': [
|
||||
'asm/x64-macosx-gas/aes/aes-x86_64.s',
|
||||
'asm/x64-macosx-gas/aes/aesni-x86_64.s',
|
||||
'asm/x64-macosx-gas/aes/aesni-sha1-x86_64.s',
|
||||
'asm/x64-macosx-gas/bn/modexp512-x86_64.s',
|
||||
'asm/x64-macosx-gas/bn/x86_64-mont.s',
|
||||
'asm/x64-macosx-gas/bn/x86_64-mont5.s',
|
||||
'asm/x64-macosx-gas/bn/x86_64-gf2m.s',
|
||||
'asm/x64-macosx-gas/camellia/cmll-x86_64.s',
|
||||
'asm/x64-macosx-gas/md5/md5-x86_64.s',
|
||||
'asm/x64-macosx-gas/rc4/rc4-x86_64.s',
|
||||
@ -847,12 +860,18 @@
|
||||
]
|
||||
}],
|
||||
['OS=="win" and target_arch=="x64"', {
|
||||
'defines': [
|
||||
'OPENSSL_BN_ASM_MONT5',
|
||||
'OPENSSL_BN_ASM_GF2m',
|
||||
],
|
||||
'sources': [
|
||||
'asm/x64-win32-masm/aes/aes-x86_64.asm',
|
||||
'asm/x64-win32-masm/aes/aesni-x86_64.asm',
|
||||
'asm/x64-win32-masm/aes/aesni-sha1-x86_64.asm',
|
||||
'asm/x64-win32-masm/bn/modexp512-x86_64.asm',
|
||||
'asm/x64-win32-masm/bn/x86_64-mont.asm',
|
||||
'asm/x64-win32-masm/bn/x86_64-mont5.asm',
|
||||
'asm/x64-win32-masm/bn/x86_64-gf2m.asm',
|
||||
'asm/x64-win32-masm/camellia/cmll-x86_64.asm',
|
||||
'asm/x64-win32-masm/md5/md5-x86_64.asm',
|
||||
'asm/x64-win32-masm/rc4/rc4-x86_64.asm',
|
||||
|
Loading…
x
Reference in New Issue
Block a user