/* * ==================================================================== * Written by Andy Polyakov for the OpenSSL * project. Rights for redistribution and usage in source and binary * forms are granted according to the OpenSSL license. * ==================================================================== * * sha256/512_block procedure for x86_64. * * 40% improvement over compiler-generated code on Opteron. On EM64T * sha256 was observed to run >80% faster and sha512 - >40%. No magical * tricks, just straight implementation... I really wonder why gcc * [being armed with inline assembler] fails to generate as fast code. * The only thing which is cool about this module is that it's very * same instruction sequence used for both SHA-256 and SHA-512. In * former case the instructions operate on 32-bit operands, while in * latter - on 64-bit ones. All I had to do is to get one flavor right, * the other one passed the test right away:-) * * sha256_block runs in ~1005 cycles on Opteron, which gives you * asymptotic performance of 64*1000/1005=63.7MBps times CPU clock * frequency in GHz. sha512_block runs in ~1275 cycles, which results * in 128*1000/1275=100MBps per GHz. Is there room for improvement? * Well, if you compare it to IA-64 implementation, which maintains * X[16] in register bank[!], tends to 4 instructions per CPU clock * cycle and runs in 1003 cycles, 1275 is very good result for 3-way * issue Opteron pipeline and X[16] maintained in memory. So that *if* * there is a way to improve it, *then* the only way would be to try to * offload X[16] updates to SSE unit, but that would require "deeper" * loop unroll, which in turn would naturally cause size blow-up, not * to mention increased complexity! And once again, only *if* it's * actually possible to noticeably improve overall ILP, instruction * level parallelism, on a given CPU implementation in this case. * * Special note on Intel EM64T. While Opteron CPU exhibits perfect * performance ratio of 1.5 between 64- and 32-bit flavors [see above], * [currently available] EM64T CPUs apparently are far from it. On the * contrary, 64-bit version, sha512_block, is ~30% *slower* than 32-bit * sha256_block:-( This is presumably because 64-bit shifts/rotates * apparently are not atomic instructions, but implemented in microcode. */ /* * OpenSolaris OS modifications * * Sun elects to use this software under the BSD license. * * This source originates from OpenSSL file sha512-x86_64.pl at * ftp://ftp.openssl.org/snapshot/openssl-0.9.8-stable-SNAP-20080131.tar.gz * (presumably for future OpenSSL release 0.9.8h), with these changes: * * 1. Added perl "use strict" and declared variables. * * 2. Added OpenSolaris ENTRY_NP/SET_SIZE macros from * /usr/include/sys/asm_linkage.h, .ident keywords, and lint(1B) guards. * * 3. Removed x86_64-xlate.pl script (not needed for as(1) or gas(1) * assemblers). Replaced the .picmeup macro with assembler code. * * 4. Added 8 to $ctx, as OpenSolaris OS has an extra 4-byte field, "algotype", * at the beginning of SHA2_CTX (the next field is 8-byte aligned). */ /* * This file was generated by a perl script (sha512-x86_64.pl) that were * used to generate sha256 and sha512 variants from the same code base. * The comments from the original file have been pasted above. */ #if defined(lint) || defined(__lint) #include #include void SHA256TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num) { (void) ctx, (void) in, (void) num; } #else #define _ASM #include ENTRY_NP(SHA256TransformBlocks) .cfi_startproc movq %rsp, %rax .cfi_def_cfa_register %rax push %rbx .cfi_offset %rbx,-16 push %rbp .cfi_offset %rbp,-24 push %r12 .cfi_offset %r12,-32 push %r13 .cfi_offset %r13,-40 push %r14 .cfi_offset %r14,-48 push %r15 .cfi_offset %r15,-56 mov %rsp,%rbp # copy %rsp shl $4,%rdx # num*16 sub $16*4+4*8,%rsp lea (%rsi,%rdx,4),%rdx # inp+num*16*4 and $-64,%rsp # align stack frame add $8,%rdi # Skip OpenSolaris field, "algotype" mov %rdi,16*4+0*8(%rsp) # save ctx, 1st arg mov %rsi,16*4+1*8(%rsp) # save inp, 2nd arg mov %rdx,16*4+2*8(%rsp) # save end pointer, "3rd" arg mov %rbp,16*4+3*8(%rsp) # save copy of %rsp # echo ".cfi_cfa_expression %rsp+88,deref,+56" | # openssl/crypto/perlasm/x86_64-xlate.pl .cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x38 #.picmeup %rbp # The .picmeup pseudo-directive, from perlasm/x86_64_xlate.pl, puts # the address of the "next" instruction into the target register # (%rbp). This generates these 2 instructions: lea .Llea(%rip),%rbp #nop # .picmeup generates a nop for mod 8 alignment--not needed here .Llea: lea K256-.(%rbp),%rbp mov 4*0(%rdi),%eax mov 4*1(%rdi),%ebx mov 4*2(%rdi),%ecx mov 4*3(%rdi),%edx mov 4*4(%rdi),%r8d mov 4*5(%rdi),%r9d mov 4*6(%rdi),%r10d mov 4*7(%rdi),%r11d jmp .Lloop .align 16 .Lloop: xor %rdi,%rdi mov 4*0(%rsi),%r12d bswap %r12d mov %r8d,%r13d mov %r8d,%r14d mov %r9d,%r15d ror $6,%r13d ror $11,%r14d xor %r10d,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %r8d,%r15d # (f^g)&e mov %r12d,0(%rsp) xor %r14d,%r13d # Sigma1(e) xor %r10d,%r15d # Ch(e,f,g)=((f^g)&e)^g add %r11d,%r12d # T1+=h mov %eax,%r11d add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %eax,%r13d mov %eax,%r14d ror $2,%r11d ror $13,%r13d mov %eax,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%r11d ror $9,%r13d or %ecx,%r14d # a|c xor %r13d,%r11d # h=Sigma0(a) and %ecx,%r15d # a&c add %r12d,%edx # d+=T1 and %ebx,%r14d # (a|c)&b add %r12d,%r11d # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%r11d # h+=Maj(a,b,c) mov 4*1(%rsi),%r12d bswap %r12d mov %edx,%r13d mov %edx,%r14d mov %r8d,%r15d ror $6,%r13d ror $11,%r14d xor %r9d,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %edx,%r15d # (f^g)&e mov %r12d,4(%rsp) xor %r14d,%r13d # Sigma1(e) xor %r9d,%r15d # Ch(e,f,g)=((f^g)&e)^g add %r10d,%r12d # T1+=h mov %r11d,%r10d add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %r11d,%r13d mov %r11d,%r14d ror $2,%r10d ror $13,%r13d mov %r11d,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%r10d ror $9,%r13d or %ebx,%r14d # a|c xor %r13d,%r10d # h=Sigma0(a) and %ebx,%r15d # a&c add %r12d,%ecx # d+=T1 and %eax,%r14d # (a|c)&b add %r12d,%r10d # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%r10d # h+=Maj(a,b,c) mov 4*2(%rsi),%r12d bswap %r12d mov %ecx,%r13d mov %ecx,%r14d mov %edx,%r15d ror $6,%r13d ror $11,%r14d xor %r8d,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %ecx,%r15d # (f^g)&e mov %r12d,8(%rsp) xor %r14d,%r13d # Sigma1(e) xor %r8d,%r15d # Ch(e,f,g)=((f^g)&e)^g add %r9d,%r12d # T1+=h mov %r10d,%r9d add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %r10d,%r13d mov %r10d,%r14d ror $2,%r9d ror $13,%r13d mov %r10d,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%r9d ror $9,%r13d or %eax,%r14d # a|c xor %r13d,%r9d # h=Sigma0(a) and %eax,%r15d # a&c add %r12d,%ebx # d+=T1 and %r11d,%r14d # (a|c)&b add %r12d,%r9d # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%r9d # h+=Maj(a,b,c) mov 4*3(%rsi),%r12d bswap %r12d mov %ebx,%r13d mov %ebx,%r14d mov %ecx,%r15d ror $6,%r13d ror $11,%r14d xor %edx,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %ebx,%r15d # (f^g)&e mov %r12d,12(%rsp) xor %r14d,%r13d # Sigma1(e) xor %edx,%r15d # Ch(e,f,g)=((f^g)&e)^g add %r8d,%r12d # T1+=h mov %r9d,%r8d add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %r9d,%r13d mov %r9d,%r14d ror $2,%r8d ror $13,%r13d mov %r9d,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%r8d ror $9,%r13d or %r11d,%r14d # a|c xor %r13d,%r8d # h=Sigma0(a) and %r11d,%r15d # a&c add %r12d,%eax # d+=T1 and %r10d,%r14d # (a|c)&b add %r12d,%r8d # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%r8d # h+=Maj(a,b,c) mov 4*4(%rsi),%r12d bswap %r12d mov %eax,%r13d mov %eax,%r14d mov %ebx,%r15d ror $6,%r13d ror $11,%r14d xor %ecx,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %eax,%r15d # (f^g)&e mov %r12d,16(%rsp) xor %r14d,%r13d # Sigma1(e) xor %ecx,%r15d # Ch(e,f,g)=((f^g)&e)^g add %edx,%r12d # T1+=h mov %r8d,%edx add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %r8d,%r13d mov %r8d,%r14d ror $2,%edx ror $13,%r13d mov %r8d,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%edx ror $9,%r13d or %r10d,%r14d # a|c xor %r13d,%edx # h=Sigma0(a) and %r10d,%r15d # a&c add %r12d,%r11d # d+=T1 and %r9d,%r14d # (a|c)&b add %r12d,%edx # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%edx # h+=Maj(a,b,c) mov 4*5(%rsi),%r12d bswap %r12d mov %r11d,%r13d mov %r11d,%r14d mov %eax,%r15d ror $6,%r13d ror $11,%r14d xor %ebx,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %r11d,%r15d # (f^g)&e mov %r12d,20(%rsp) xor %r14d,%r13d # Sigma1(e) xor %ebx,%r15d # Ch(e,f,g)=((f^g)&e)^g add %ecx,%r12d # T1+=h mov %edx,%ecx add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %edx,%r13d mov %edx,%r14d ror $2,%ecx ror $13,%r13d mov %edx,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%ecx ror $9,%r13d or %r9d,%r14d # a|c xor %r13d,%ecx # h=Sigma0(a) and %r9d,%r15d # a&c add %r12d,%r10d # d+=T1 and %r8d,%r14d # (a|c)&b add %r12d,%ecx # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%ecx # h+=Maj(a,b,c) mov 4*6(%rsi),%r12d bswap %r12d mov %r10d,%r13d mov %r10d,%r14d mov %r11d,%r15d ror $6,%r13d ror $11,%r14d xor %eax,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %r10d,%r15d # (f^g)&e mov %r12d,24(%rsp) xor %r14d,%r13d # Sigma1(e) xor %eax,%r15d # Ch(e,f,g)=((f^g)&e)^g add %ebx,%r12d # T1+=h mov %ecx,%ebx add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %ecx,%r13d mov %ecx,%r14d ror $2,%ebx ror $13,%r13d mov %ecx,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%ebx ror $9,%r13d or %r8d,%r14d # a|c xor %r13d,%ebx # h=Sigma0(a) and %r8d,%r15d # a&c add %r12d,%r9d # d+=T1 and %edx,%r14d # (a|c)&b add %r12d,%ebx # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%ebx # h+=Maj(a,b,c) mov 4*7(%rsi),%r12d bswap %r12d mov %r9d,%r13d mov %r9d,%r14d mov %r10d,%r15d ror $6,%r13d ror $11,%r14d xor %r11d,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %r9d,%r15d # (f^g)&e mov %r12d,28(%rsp) xor %r14d,%r13d # Sigma1(e) xor %r11d,%r15d # Ch(e,f,g)=((f^g)&e)^g add %eax,%r12d # T1+=h mov %ebx,%eax add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %ebx,%r13d mov %ebx,%r14d ror $2,%eax ror $13,%r13d mov %ebx,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%eax ror $9,%r13d or %edx,%r14d # a|c xor %r13d,%eax # h=Sigma0(a) and %edx,%r15d # a&c add %r12d,%r8d # d+=T1 and %ecx,%r14d # (a|c)&b add %r12d,%eax # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%eax # h+=Maj(a,b,c) mov 4*8(%rsi),%r12d bswap %r12d mov %r8d,%r13d mov %r8d,%r14d mov %r9d,%r15d ror $6,%r13d ror $11,%r14d xor %r10d,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %r8d,%r15d # (f^g)&e mov %r12d,32(%rsp) xor %r14d,%r13d # Sigma1(e) xor %r10d,%r15d # Ch(e,f,g)=((f^g)&e)^g add %r11d,%r12d # T1+=h mov %eax,%r11d add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %eax,%r13d mov %eax,%r14d ror $2,%r11d ror $13,%r13d mov %eax,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%r11d ror $9,%r13d or %ecx,%r14d # a|c xor %r13d,%r11d # h=Sigma0(a) and %ecx,%r15d # a&c add %r12d,%edx # d+=T1 and %ebx,%r14d # (a|c)&b add %r12d,%r11d # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%r11d # h+=Maj(a,b,c) mov 4*9(%rsi),%r12d bswap %r12d mov %edx,%r13d mov %edx,%r14d mov %r8d,%r15d ror $6,%r13d ror $11,%r14d xor %r9d,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %edx,%r15d # (f^g)&e mov %r12d,36(%rsp) xor %r14d,%r13d # Sigma1(e) xor %r9d,%r15d # Ch(e,f,g)=((f^g)&e)^g add %r10d,%r12d # T1+=h mov %r11d,%r10d add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %r11d,%r13d mov %r11d,%r14d ror $2,%r10d ror $13,%r13d mov %r11d,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%r10d ror $9,%r13d or %ebx,%r14d # a|c xor %r13d,%r10d # h=Sigma0(a) and %ebx,%r15d # a&c add %r12d,%ecx # d+=T1 and %eax,%r14d # (a|c)&b add %r12d,%r10d # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%r10d # h+=Maj(a,b,c) mov 4*10(%rsi),%r12d bswap %r12d mov %ecx,%r13d mov %ecx,%r14d mov %edx,%r15d ror $6,%r13d ror $11,%r14d xor %r8d,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %ecx,%r15d # (f^g)&e mov %r12d,40(%rsp) xor %r14d,%r13d # Sigma1(e) xor %r8d,%r15d # Ch(e,f,g)=((f^g)&e)^g add %r9d,%r12d # T1+=h mov %r10d,%r9d add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %r10d,%r13d mov %r10d,%r14d ror $2,%r9d ror $13,%r13d mov %r10d,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%r9d ror $9,%r13d or %eax,%r14d # a|c xor %r13d,%r9d # h=Sigma0(a) and %eax,%r15d # a&c add %r12d,%ebx # d+=T1 and %r11d,%r14d # (a|c)&b add %r12d,%r9d # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%r9d # h+=Maj(a,b,c) mov 4*11(%rsi),%r12d bswap %r12d mov %ebx,%r13d mov %ebx,%r14d mov %ecx,%r15d ror $6,%r13d ror $11,%r14d xor %edx,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %ebx,%r15d # (f^g)&e mov %r12d,44(%rsp) xor %r14d,%r13d # Sigma1(e) xor %edx,%r15d # Ch(e,f,g)=((f^g)&e)^g add %r8d,%r12d # T1+=h mov %r9d,%r8d add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %r9d,%r13d mov %r9d,%r14d ror $2,%r8d ror $13,%r13d mov %r9d,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%r8d ror $9,%r13d or %r11d,%r14d # a|c xor %r13d,%r8d # h=Sigma0(a) and %r11d,%r15d # a&c add %r12d,%eax # d+=T1 and %r10d,%r14d # (a|c)&b add %r12d,%r8d # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%r8d # h+=Maj(a,b,c) mov 4*12(%rsi),%r12d bswap %r12d mov %eax,%r13d mov %eax,%r14d mov %ebx,%r15d ror $6,%r13d ror $11,%r14d xor %ecx,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %eax,%r15d # (f^g)&e mov %r12d,48(%rsp) xor %r14d,%r13d # Sigma1(e) xor %ecx,%r15d # Ch(e,f,g)=((f^g)&e)^g add %edx,%r12d # T1+=h mov %r8d,%edx add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %r8d,%r13d mov %r8d,%r14d ror $2,%edx ror $13,%r13d mov %r8d,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%edx ror $9,%r13d or %r10d,%r14d # a|c xor %r13d,%edx # h=Sigma0(a) and %r10d,%r15d # a&c add %r12d,%r11d # d+=T1 and %r9d,%r14d # (a|c)&b add %r12d,%edx # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%edx # h+=Maj(a,b,c) mov 4*13(%rsi),%r12d bswap %r12d mov %r11d,%r13d mov %r11d,%r14d mov %eax,%r15d ror $6,%r13d ror $11,%r14d xor %ebx,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %r11d,%r15d # (f^g)&e mov %r12d,52(%rsp) xor %r14d,%r13d # Sigma1(e) xor %ebx,%r15d # Ch(e,f,g)=((f^g)&e)^g add %ecx,%r12d # T1+=h mov %edx,%ecx add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %edx,%r13d mov %edx,%r14d ror $2,%ecx ror $13,%r13d mov %edx,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%ecx ror $9,%r13d or %r9d,%r14d # a|c xor %r13d,%ecx # h=Sigma0(a) and %r9d,%r15d # a&c add %r12d,%r10d # d+=T1 and %r8d,%r14d # (a|c)&b add %r12d,%ecx # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%ecx # h+=Maj(a,b,c) mov 4*14(%rsi),%r12d bswap %r12d mov %r10d,%r13d mov %r10d,%r14d mov %r11d,%r15d ror $6,%r13d ror $11,%r14d xor %eax,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %r10d,%r15d # (f^g)&e mov %r12d,56(%rsp) xor %r14d,%r13d # Sigma1(e) xor %eax,%r15d # Ch(e,f,g)=((f^g)&e)^g add %ebx,%r12d # T1+=h mov %ecx,%ebx add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %ecx,%r13d mov %ecx,%r14d ror $2,%ebx ror $13,%r13d mov %ecx,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%ebx ror $9,%r13d or %r8d,%r14d # a|c xor %r13d,%ebx # h=Sigma0(a) and %r8d,%r15d # a&c add %r12d,%r9d # d+=T1 and %edx,%r14d # (a|c)&b add %r12d,%ebx # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%ebx # h+=Maj(a,b,c) mov 4*15(%rsi),%r12d bswap %r12d mov %r9d,%r13d mov %r9d,%r14d mov %r10d,%r15d ror $6,%r13d ror $11,%r14d xor %r11d,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %r9d,%r15d # (f^g)&e mov %r12d,60(%rsp) xor %r14d,%r13d # Sigma1(e) xor %r11d,%r15d # Ch(e,f,g)=((f^g)&e)^g add %eax,%r12d # T1+=h mov %ebx,%eax add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %ebx,%r13d mov %ebx,%r14d ror $2,%eax ror $13,%r13d mov %ebx,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%eax ror $9,%r13d or %edx,%r14d # a|c xor %r13d,%eax # h=Sigma0(a) and %edx,%r15d # a&c add %r12d,%r8d # d+=T1 and %ecx,%r14d # (a|c)&b add %r12d,%eax # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%eax # h+=Maj(a,b,c) jmp .Lrounds_16_xx .align 16 .Lrounds_16_xx: mov 4(%rsp),%r13d mov 56(%rsp),%r12d mov %r13d,%r15d shr $3,%r13d ror $7,%r15d xor %r15d,%r13d ror $11,%r15d xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) mov %r12d,%r14d shr $10,%r12d ror $17,%r14d xor %r14d,%r12d ror $2,%r14d xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) add %r13d,%r12d add 36(%rsp),%r12d add 0(%rsp),%r12d mov %r8d,%r13d mov %r8d,%r14d mov %r9d,%r15d ror $6,%r13d ror $11,%r14d xor %r10d,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %r8d,%r15d # (f^g)&e mov %r12d,0(%rsp) xor %r14d,%r13d # Sigma1(e) xor %r10d,%r15d # Ch(e,f,g)=((f^g)&e)^g add %r11d,%r12d # T1+=h mov %eax,%r11d add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %eax,%r13d mov %eax,%r14d ror $2,%r11d ror $13,%r13d mov %eax,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%r11d ror $9,%r13d or %ecx,%r14d # a|c xor %r13d,%r11d # h=Sigma0(a) and %ecx,%r15d # a&c add %r12d,%edx # d+=T1 and %ebx,%r14d # (a|c)&b add %r12d,%r11d # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%r11d # h+=Maj(a,b,c) mov 8(%rsp),%r13d mov 60(%rsp),%r12d mov %r13d,%r15d shr $3,%r13d ror $7,%r15d xor %r15d,%r13d ror $11,%r15d xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) mov %r12d,%r14d shr $10,%r12d ror $17,%r14d xor %r14d,%r12d ror $2,%r14d xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) add %r13d,%r12d add 40(%rsp),%r12d add 4(%rsp),%r12d mov %edx,%r13d mov %edx,%r14d mov %r8d,%r15d ror $6,%r13d ror $11,%r14d xor %r9d,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %edx,%r15d # (f^g)&e mov %r12d,4(%rsp) xor %r14d,%r13d # Sigma1(e) xor %r9d,%r15d # Ch(e,f,g)=((f^g)&e)^g add %r10d,%r12d # T1+=h mov %r11d,%r10d add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %r11d,%r13d mov %r11d,%r14d ror $2,%r10d ror $13,%r13d mov %r11d,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%r10d ror $9,%r13d or %ebx,%r14d # a|c xor %r13d,%r10d # h=Sigma0(a) and %ebx,%r15d # a&c add %r12d,%ecx # d+=T1 and %eax,%r14d # (a|c)&b add %r12d,%r10d # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%r10d # h+=Maj(a,b,c) mov 12(%rsp),%r13d mov 0(%rsp),%r12d mov %r13d,%r15d shr $3,%r13d ror $7,%r15d xor %r15d,%r13d ror $11,%r15d xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) mov %r12d,%r14d shr $10,%r12d ror $17,%r14d xor %r14d,%r12d ror $2,%r14d xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) add %r13d,%r12d add 44(%rsp),%r12d add 8(%rsp),%r12d mov %ecx,%r13d mov %ecx,%r14d mov %edx,%r15d ror $6,%r13d ror $11,%r14d xor %r8d,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %ecx,%r15d # (f^g)&e mov %r12d,8(%rsp) xor %r14d,%r13d # Sigma1(e) xor %r8d,%r15d # Ch(e,f,g)=((f^g)&e)^g add %r9d,%r12d # T1+=h mov %r10d,%r9d add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %r10d,%r13d mov %r10d,%r14d ror $2,%r9d ror $13,%r13d mov %r10d,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%r9d ror $9,%r13d or %eax,%r14d # a|c xor %r13d,%r9d # h=Sigma0(a) and %eax,%r15d # a&c add %r12d,%ebx # d+=T1 and %r11d,%r14d # (a|c)&b add %r12d,%r9d # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%r9d # h+=Maj(a,b,c) mov 16(%rsp),%r13d mov 4(%rsp),%r12d mov %r13d,%r15d shr $3,%r13d ror $7,%r15d xor %r15d,%r13d ror $11,%r15d xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) mov %r12d,%r14d shr $10,%r12d ror $17,%r14d xor %r14d,%r12d ror $2,%r14d xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) add %r13d,%r12d add 48(%rsp),%r12d add 12(%rsp),%r12d mov %ebx,%r13d mov %ebx,%r14d mov %ecx,%r15d ror $6,%r13d ror $11,%r14d xor %edx,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %ebx,%r15d # (f^g)&e mov %r12d,12(%rsp) xor %r14d,%r13d # Sigma1(e) xor %edx,%r15d # Ch(e,f,g)=((f^g)&e)^g add %r8d,%r12d # T1+=h mov %r9d,%r8d add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %r9d,%r13d mov %r9d,%r14d ror $2,%r8d ror $13,%r13d mov %r9d,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%r8d ror $9,%r13d or %r11d,%r14d # a|c xor %r13d,%r8d # h=Sigma0(a) and %r11d,%r15d # a&c add %r12d,%eax # d+=T1 and %r10d,%r14d # (a|c)&b add %r12d,%r8d # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%r8d # h+=Maj(a,b,c) mov 20(%rsp),%r13d mov 8(%rsp),%r12d mov %r13d,%r15d shr $3,%r13d ror $7,%r15d xor %r15d,%r13d ror $11,%r15d xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) mov %r12d,%r14d shr $10,%r12d ror $17,%r14d xor %r14d,%r12d ror $2,%r14d xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) add %r13d,%r12d add 52(%rsp),%r12d add 16(%rsp),%r12d mov %eax,%r13d mov %eax,%r14d mov %ebx,%r15d ror $6,%r13d ror $11,%r14d xor %ecx,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %eax,%r15d # (f^g)&e mov %r12d,16(%rsp) xor %r14d,%r13d # Sigma1(e) xor %ecx,%r15d # Ch(e,f,g)=((f^g)&e)^g add %edx,%r12d # T1+=h mov %r8d,%edx add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %r8d,%r13d mov %r8d,%r14d ror $2,%edx ror $13,%r13d mov %r8d,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%edx ror $9,%r13d or %r10d,%r14d # a|c xor %r13d,%edx # h=Sigma0(a) and %r10d,%r15d # a&c add %r12d,%r11d # d+=T1 and %r9d,%r14d # (a|c)&b add %r12d,%edx # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%edx # h+=Maj(a,b,c) mov 24(%rsp),%r13d mov 12(%rsp),%r12d mov %r13d,%r15d shr $3,%r13d ror $7,%r15d xor %r15d,%r13d ror $11,%r15d xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) mov %r12d,%r14d shr $10,%r12d ror $17,%r14d xor %r14d,%r12d ror $2,%r14d xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) add %r13d,%r12d add 56(%rsp),%r12d add 20(%rsp),%r12d mov %r11d,%r13d mov %r11d,%r14d mov %eax,%r15d ror $6,%r13d ror $11,%r14d xor %ebx,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %r11d,%r15d # (f^g)&e mov %r12d,20(%rsp) xor %r14d,%r13d # Sigma1(e) xor %ebx,%r15d # Ch(e,f,g)=((f^g)&e)^g add %ecx,%r12d # T1+=h mov %edx,%ecx add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %edx,%r13d mov %edx,%r14d ror $2,%ecx ror $13,%r13d mov %edx,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%ecx ror $9,%r13d or %r9d,%r14d # a|c xor %r13d,%ecx # h=Sigma0(a) and %r9d,%r15d # a&c add %r12d,%r10d # d+=T1 and %r8d,%r14d # (a|c)&b add %r12d,%ecx # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%ecx # h+=Maj(a,b,c) mov 28(%rsp),%r13d mov 16(%rsp),%r12d mov %r13d,%r15d shr $3,%r13d ror $7,%r15d xor %r15d,%r13d ror $11,%r15d xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) mov %r12d,%r14d shr $10,%r12d ror $17,%r14d xor %r14d,%r12d ror $2,%r14d xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) add %r13d,%r12d add 60(%rsp),%r12d add 24(%rsp),%r12d mov %r10d,%r13d mov %r10d,%r14d mov %r11d,%r15d ror $6,%r13d ror $11,%r14d xor %eax,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %r10d,%r15d # (f^g)&e mov %r12d,24(%rsp) xor %r14d,%r13d # Sigma1(e) xor %eax,%r15d # Ch(e,f,g)=((f^g)&e)^g add %ebx,%r12d # T1+=h mov %ecx,%ebx add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %ecx,%r13d mov %ecx,%r14d ror $2,%ebx ror $13,%r13d mov %ecx,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%ebx ror $9,%r13d or %r8d,%r14d # a|c xor %r13d,%ebx # h=Sigma0(a) and %r8d,%r15d # a&c add %r12d,%r9d # d+=T1 and %edx,%r14d # (a|c)&b add %r12d,%ebx # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%ebx # h+=Maj(a,b,c) mov 32(%rsp),%r13d mov 20(%rsp),%r12d mov %r13d,%r15d shr $3,%r13d ror $7,%r15d xor %r15d,%r13d ror $11,%r15d xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) mov %r12d,%r14d shr $10,%r12d ror $17,%r14d xor %r14d,%r12d ror $2,%r14d xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) add %r13d,%r12d add 0(%rsp),%r12d add 28(%rsp),%r12d mov %r9d,%r13d mov %r9d,%r14d mov %r10d,%r15d ror $6,%r13d ror $11,%r14d xor %r11d,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %r9d,%r15d # (f^g)&e mov %r12d,28(%rsp) xor %r14d,%r13d # Sigma1(e) xor %r11d,%r15d # Ch(e,f,g)=((f^g)&e)^g add %eax,%r12d # T1+=h mov %ebx,%eax add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %ebx,%r13d mov %ebx,%r14d ror $2,%eax ror $13,%r13d mov %ebx,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%eax ror $9,%r13d or %edx,%r14d # a|c xor %r13d,%eax # h=Sigma0(a) and %edx,%r15d # a&c add %r12d,%r8d # d+=T1 and %ecx,%r14d # (a|c)&b add %r12d,%eax # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%eax # h+=Maj(a,b,c) mov 36(%rsp),%r13d mov 24(%rsp),%r12d mov %r13d,%r15d shr $3,%r13d ror $7,%r15d xor %r15d,%r13d ror $11,%r15d xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) mov %r12d,%r14d shr $10,%r12d ror $17,%r14d xor %r14d,%r12d ror $2,%r14d xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) add %r13d,%r12d add 4(%rsp),%r12d add 32(%rsp),%r12d mov %r8d,%r13d mov %r8d,%r14d mov %r9d,%r15d ror $6,%r13d ror $11,%r14d xor %r10d,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %r8d,%r15d # (f^g)&e mov %r12d,32(%rsp) xor %r14d,%r13d # Sigma1(e) xor %r10d,%r15d # Ch(e,f,g)=((f^g)&e)^g add %r11d,%r12d # T1+=h mov %eax,%r11d add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %eax,%r13d mov %eax,%r14d ror $2,%r11d ror $13,%r13d mov %eax,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%r11d ror $9,%r13d or %ecx,%r14d # a|c xor %r13d,%r11d # h=Sigma0(a) and %ecx,%r15d # a&c add %r12d,%edx # d+=T1 and %ebx,%r14d # (a|c)&b add %r12d,%r11d # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%r11d # h+=Maj(a,b,c) mov 40(%rsp),%r13d mov 28(%rsp),%r12d mov %r13d,%r15d shr $3,%r13d ror $7,%r15d xor %r15d,%r13d ror $11,%r15d xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) mov %r12d,%r14d shr $10,%r12d ror $17,%r14d xor %r14d,%r12d ror $2,%r14d xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) add %r13d,%r12d add 8(%rsp),%r12d add 36(%rsp),%r12d mov %edx,%r13d mov %edx,%r14d mov %r8d,%r15d ror $6,%r13d ror $11,%r14d xor %r9d,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %edx,%r15d # (f^g)&e mov %r12d,36(%rsp) xor %r14d,%r13d # Sigma1(e) xor %r9d,%r15d # Ch(e,f,g)=((f^g)&e)^g add %r10d,%r12d # T1+=h mov %r11d,%r10d add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %r11d,%r13d mov %r11d,%r14d ror $2,%r10d ror $13,%r13d mov %r11d,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%r10d ror $9,%r13d or %ebx,%r14d # a|c xor %r13d,%r10d # h=Sigma0(a) and %ebx,%r15d # a&c add %r12d,%ecx # d+=T1 and %eax,%r14d # (a|c)&b add %r12d,%r10d # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%r10d # h+=Maj(a,b,c) mov 44(%rsp),%r13d mov 32(%rsp),%r12d mov %r13d,%r15d shr $3,%r13d ror $7,%r15d xor %r15d,%r13d ror $11,%r15d xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) mov %r12d,%r14d shr $10,%r12d ror $17,%r14d xor %r14d,%r12d ror $2,%r14d xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) add %r13d,%r12d add 12(%rsp),%r12d add 40(%rsp),%r12d mov %ecx,%r13d mov %ecx,%r14d mov %edx,%r15d ror $6,%r13d ror $11,%r14d xor %r8d,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %ecx,%r15d # (f^g)&e mov %r12d,40(%rsp) xor %r14d,%r13d # Sigma1(e) xor %r8d,%r15d # Ch(e,f,g)=((f^g)&e)^g add %r9d,%r12d # T1+=h mov %r10d,%r9d add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %r10d,%r13d mov %r10d,%r14d ror $2,%r9d ror $13,%r13d mov %r10d,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%r9d ror $9,%r13d or %eax,%r14d # a|c xor %r13d,%r9d # h=Sigma0(a) and %eax,%r15d # a&c add %r12d,%ebx # d+=T1 and %r11d,%r14d # (a|c)&b add %r12d,%r9d # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%r9d # h+=Maj(a,b,c) mov 48(%rsp),%r13d mov 36(%rsp),%r12d mov %r13d,%r15d shr $3,%r13d ror $7,%r15d xor %r15d,%r13d ror $11,%r15d xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) mov %r12d,%r14d shr $10,%r12d ror $17,%r14d xor %r14d,%r12d ror $2,%r14d xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) add %r13d,%r12d add 16(%rsp),%r12d add 44(%rsp),%r12d mov %ebx,%r13d mov %ebx,%r14d mov %ecx,%r15d ror $6,%r13d ror $11,%r14d xor %edx,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %ebx,%r15d # (f^g)&e mov %r12d,44(%rsp) xor %r14d,%r13d # Sigma1(e) xor %edx,%r15d # Ch(e,f,g)=((f^g)&e)^g add %r8d,%r12d # T1+=h mov %r9d,%r8d add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %r9d,%r13d mov %r9d,%r14d ror $2,%r8d ror $13,%r13d mov %r9d,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%r8d ror $9,%r13d or %r11d,%r14d # a|c xor %r13d,%r8d # h=Sigma0(a) and %r11d,%r15d # a&c add %r12d,%eax # d+=T1 and %r10d,%r14d # (a|c)&b add %r12d,%r8d # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%r8d # h+=Maj(a,b,c) mov 52(%rsp),%r13d mov 40(%rsp),%r12d mov %r13d,%r15d shr $3,%r13d ror $7,%r15d xor %r15d,%r13d ror $11,%r15d xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) mov %r12d,%r14d shr $10,%r12d ror $17,%r14d xor %r14d,%r12d ror $2,%r14d xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) add %r13d,%r12d add 20(%rsp),%r12d add 48(%rsp),%r12d mov %eax,%r13d mov %eax,%r14d mov %ebx,%r15d ror $6,%r13d ror $11,%r14d xor %ecx,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %eax,%r15d # (f^g)&e mov %r12d,48(%rsp) xor %r14d,%r13d # Sigma1(e) xor %ecx,%r15d # Ch(e,f,g)=((f^g)&e)^g add %edx,%r12d # T1+=h mov %r8d,%edx add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %r8d,%r13d mov %r8d,%r14d ror $2,%edx ror $13,%r13d mov %r8d,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%edx ror $9,%r13d or %r10d,%r14d # a|c xor %r13d,%edx # h=Sigma0(a) and %r10d,%r15d # a&c add %r12d,%r11d # d+=T1 and %r9d,%r14d # (a|c)&b add %r12d,%edx # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%edx # h+=Maj(a,b,c) mov 56(%rsp),%r13d mov 44(%rsp),%r12d mov %r13d,%r15d shr $3,%r13d ror $7,%r15d xor %r15d,%r13d ror $11,%r15d xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) mov %r12d,%r14d shr $10,%r12d ror $17,%r14d xor %r14d,%r12d ror $2,%r14d xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) add %r13d,%r12d add 24(%rsp),%r12d add 52(%rsp),%r12d mov %r11d,%r13d mov %r11d,%r14d mov %eax,%r15d ror $6,%r13d ror $11,%r14d xor %ebx,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %r11d,%r15d # (f^g)&e mov %r12d,52(%rsp) xor %r14d,%r13d # Sigma1(e) xor %ebx,%r15d # Ch(e,f,g)=((f^g)&e)^g add %ecx,%r12d # T1+=h mov %edx,%ecx add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %edx,%r13d mov %edx,%r14d ror $2,%ecx ror $13,%r13d mov %edx,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%ecx ror $9,%r13d or %r9d,%r14d # a|c xor %r13d,%ecx # h=Sigma0(a) and %r9d,%r15d # a&c add %r12d,%r10d # d+=T1 and %r8d,%r14d # (a|c)&b add %r12d,%ecx # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%ecx # h+=Maj(a,b,c) mov 60(%rsp),%r13d mov 48(%rsp),%r12d mov %r13d,%r15d shr $3,%r13d ror $7,%r15d xor %r15d,%r13d ror $11,%r15d xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) mov %r12d,%r14d shr $10,%r12d ror $17,%r14d xor %r14d,%r12d ror $2,%r14d xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) add %r13d,%r12d add 28(%rsp),%r12d add 56(%rsp),%r12d mov %r10d,%r13d mov %r10d,%r14d mov %r11d,%r15d ror $6,%r13d ror $11,%r14d xor %eax,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %r10d,%r15d # (f^g)&e mov %r12d,56(%rsp) xor %r14d,%r13d # Sigma1(e) xor %eax,%r15d # Ch(e,f,g)=((f^g)&e)^g add %ebx,%r12d # T1+=h mov %ecx,%ebx add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %ecx,%r13d mov %ecx,%r14d ror $2,%ebx ror $13,%r13d mov %ecx,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%ebx ror $9,%r13d or %r8d,%r14d # a|c xor %r13d,%ebx # h=Sigma0(a) and %r8d,%r15d # a&c add %r12d,%r9d # d+=T1 and %edx,%r14d # (a|c)&b add %r12d,%ebx # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%ebx # h+=Maj(a,b,c) mov 0(%rsp),%r13d mov 52(%rsp),%r12d mov %r13d,%r15d shr $3,%r13d ror $7,%r15d xor %r15d,%r13d ror $11,%r15d xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) mov %r12d,%r14d shr $10,%r12d ror $17,%r14d xor %r14d,%r12d ror $2,%r14d xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) add %r13d,%r12d add 32(%rsp),%r12d add 60(%rsp),%r12d mov %r9d,%r13d mov %r9d,%r14d mov %r10d,%r15d ror $6,%r13d ror $11,%r14d xor %r11d,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %r9d,%r15d # (f^g)&e mov %r12d,60(%rsp) xor %r14d,%r13d # Sigma1(e) xor %r11d,%r15d # Ch(e,f,g)=((f^g)&e)^g add %eax,%r12d # T1+=h mov %ebx,%eax add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %ebx,%r13d mov %ebx,%r14d ror $2,%eax ror $13,%r13d mov %ebx,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%eax ror $9,%r13d or %edx,%r14d # a|c xor %r13d,%eax # h=Sigma0(a) and %edx,%r15d # a&c add %r12d,%r8d # d+=T1 and %ecx,%r14d # (a|c)&b add %r12d,%eax # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%eax # h+=Maj(a,b,c) cmp $64,%rdi jb .Lrounds_16_xx mov 16*4+0*8(%rsp),%rdi lea 16*4(%rsi),%rsi add 4*0(%rdi),%eax add 4*1(%rdi),%ebx add 4*2(%rdi),%ecx add 4*3(%rdi),%edx add 4*4(%rdi),%r8d add 4*5(%rdi),%r9d add 4*6(%rdi),%r10d add 4*7(%rdi),%r11d cmp 16*4+2*8(%rsp),%rsi mov %eax,4*0(%rdi) mov %ebx,4*1(%rdi) mov %ecx,4*2(%rdi) mov %edx,4*3(%rdi) mov %r8d,4*4(%rdi) mov %r9d,4*5(%rdi) mov %r10d,4*6(%rdi) mov %r11d,4*7(%rdi) jb .Lloop mov 16*4+3*8(%rsp),%rsp .cfi_def_cfa %rsp,56 pop %r15 .cfi_adjust_cfa_offset -8 .cfi_restore %r15 pop %r14 .cfi_adjust_cfa_offset -8 .cfi_restore %r14 pop %r13 .cfi_adjust_cfa_offset -8 .cfi_restore %r13 pop %r12 .cfi_adjust_cfa_offset -8 .cfi_restore %r12 pop %rbp .cfi_adjust_cfa_offset -8 .cfi_restore %rbp pop %rbx .cfi_adjust_cfa_offset -8 .cfi_restore %rbx RET .cfi_endproc SET_SIZE(SHA256TransformBlocks) .section .rodata .align 64 .type K256,@object K256: .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 #endif /* !lint && !__lint */ #ifdef __ELF__ .section .note.GNU-stack,"",%progbits #endif