libbb/sha1: x86_64 version: generate from a script, optimize a bit
function old new delta sha1_process_block64 3569 3502 -67 Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
		| @@ -59,7 +59,7 @@ config SHA1_SMALL | ||||
| 	Trade binary size versus speed for the sha1 algorithm. | ||||
| 	                throughput MB/s   size of sha1_process_block64 | ||||
| 	value           486  x86-64       486   x86-64 | ||||
| 	0               367  367          3657  3570 | ||||
| 	0               367  375          3657  3502 | ||||
| 	1               224  229           654   732 | ||||
| 	2,3             200  195           358   380 | ||||
|  | ||||
|   | ||||
| @@ -1,23 +1,27 @@ | ||||
| ### Generated by hash_md5_sha_x86-64.S.sh ### | ||||
| #if defined(__GNUC__) && defined(__x86_64__) | ||||
|  | ||||
| #if CONFIG_SHA1_SMALL == 0 && defined(__GNUC__) && defined(__x86_64__) | ||||
| 	.section	.text.sha1_process_block64,"ax",@progbits | ||||
|         .globl  sha1_process_block64 | ||||
|         .hidden sha1_process_block64 | ||||
| 	.globl  sha1_process_block64 | ||||
| 	.hidden sha1_process_block64 | ||||
| 	.type	sha1_process_block64, @function | ||||
|  | ||||
| 	.balign	8	# allow decoders to fetch at least 4 first insns | ||||
| sha1_process_block64: | ||||
| 	pushq	%r15		# | ||||
| 	pushq	%r14		# | ||||
| 	pushq	%r13		# | ||||
| 	pushq	%r12		# | ||||
| 	pushq	%rbp		# | ||||
| 	pushq	%rbx		# | ||||
| 	pushq	%rdi		# we need ctx at the end | ||||
| 	pushq	%r15	# | ||||
| 	pushq	%r14	# | ||||
| 	pushq	%r13	# | ||||
| 	pushq	%r12	# | ||||
| 	pushq	%rbp	# | ||||
| 	pushq	%rbx	# | ||||
| 	pushq	%rdi	# we need ctx at the end | ||||
|  | ||||
| #Register and stack use: | ||||
| # eax..edx: a..d | ||||
| # ebp: e | ||||
| # esi,edi: temps | ||||
| # -32+4*n(%rsp),r8...r15: W[0..7,8..15] | ||||
| # (TODO: actually W[0..7] are used a bit more often, put _thme_ into r8..r15?) | ||||
|  | ||||
| 	movq	4*8(%rdi), %r8 | ||||
| 	bswapq	%r8 | ||||
| @@ -253,7 +257,7 @@ sha1_process_block64: | ||||
| 	xorl	%ecx, %edi		# ^d | ||||
| 	andl	%eax, %edi		# &b | ||||
| 	xorl	%ecx, %edi		# (((c ^ d) & b) ^ d) | ||||
| 	leal	0x5A827999(%rdx,%rsi),%edx # e += RCONST + W[n] | ||||
| 	leal	0x5A827999(%rdx,%rsi), %edx # e += RCONST + W[n & 15] | ||||
| 	addl	%edi, %edx		# e += (((c ^ d) & b) ^ d) | ||||
| 	movl	%ebp, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| @@ -270,7 +274,7 @@ sha1_process_block64: | ||||
| 	xorl	%ebx, %edi		# ^d | ||||
| 	andl	%ebp, %edi		# &b | ||||
| 	xorl	%ebx, %edi		# (((c ^ d) & b) ^ d) | ||||
| 	leal	0x5A827999(%rcx,%rsi),%ecx # e += RCONST + W[n] | ||||
| 	leal	0x5A827999(%rcx,%rsi), %ecx # e += RCONST + W[n & 15] | ||||
| 	addl	%edi, %ecx		# e += (((c ^ d) & b) ^ d) | ||||
| 	movl	%edx, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| @@ -287,7 +291,7 @@ sha1_process_block64: | ||||
| 	xorl	%eax, %edi		# ^d | ||||
| 	andl	%edx, %edi		# &b | ||||
| 	xorl	%eax, %edi		# (((c ^ d) & b) ^ d) | ||||
| 	leal	0x5A827999(%rbx,%rsi),%ebx # e += RCONST + W[n] | ||||
| 	leal	0x5A827999(%rbx,%rsi), %ebx # e += RCONST + W[n & 15] | ||||
| 	addl	%edi, %ebx		# e += (((c ^ d) & b) ^ d) | ||||
| 	movl	%ecx, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| @@ -304,7 +308,7 @@ sha1_process_block64: | ||||
| 	xorl	%ebp, %edi		# ^d | ||||
| 	andl	%ecx, %edi		# &b | ||||
| 	xorl	%ebp, %edi		# (((c ^ d) & b) ^ d) | ||||
| 	leal	0x5A827999(%rax,%rsi),%eax # e += RCONST + W[n] | ||||
| 	leal	0x5A827999(%rax,%rsi), %eax # e += RCONST + W[n & 15] | ||||
| 	addl	%edi, %eax		# e += (((c ^ d) & b) ^ d) | ||||
| 	movl	%ebx, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| @@ -320,7 +324,7 @@ sha1_process_block64: | ||||
| 	movl	%ecx, %edi		# c | ||||
| 	xorl	%edx, %edi		# ^d | ||||
| 	xorl	%ebx, %edi		# ^b | ||||
| 	leal	0x6ED9EBA1(%rbp,%rsi), %ebp # e += RCONST + mixed_W | ||||
| 	leal	0x6ED9EBA1(%rbp,%rsi), %ebp # e += RCONST + W[n & 15] | ||||
| 	addl	%edi, %ebp		# e += (c ^ d ^ b) | ||||
| 	movl	%eax, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| @@ -336,7 +340,7 @@ sha1_process_block64: | ||||
| 	movl	%ebx, %edi		# c | ||||
| 	xorl	%ecx, %edi		# ^d | ||||
| 	xorl	%eax, %edi		# ^b | ||||
| 	leal	0x6ED9EBA1(%rdx,%rsi), %edx # e += RCONST + mixed_W | ||||
| 	leal	0x6ED9EBA1(%rdx,%rsi), %edx # e += RCONST + W[n & 15] | ||||
| 	addl	%edi, %edx		# e += (c ^ d ^ b) | ||||
| 	movl	%ebp, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| @@ -352,7 +356,7 @@ sha1_process_block64: | ||||
| 	movl	%eax, %edi		# c | ||||
| 	xorl	%ebx, %edi		# ^d | ||||
| 	xorl	%ebp, %edi		# ^b | ||||
| 	leal	0x6ED9EBA1(%rcx,%rsi), %ecx # e += RCONST + mixed_W | ||||
| 	leal	0x6ED9EBA1(%rcx,%rsi), %ecx # e += RCONST + W[n & 15] | ||||
| 	addl	%edi, %ecx		# e += (c ^ d ^ b) | ||||
| 	movl	%edx, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| @@ -368,135 +372,119 @@ sha1_process_block64: | ||||
| 	movl	%ebp, %edi		# c | ||||
| 	xorl	%eax, %edi		# ^d | ||||
| 	xorl	%edx, %edi		# ^b | ||||
| 	leal	0x6ED9EBA1(%rbx,%rsi), %ebx # e += RCONST + mixed_W | ||||
| 	leal	0x6ED9EBA1(%rbx,%rsi), %ebx # e += RCONST + W[n & 15] | ||||
| 	addl	%edi, %ebx		# e += (c ^ d ^ b) | ||||
| 	movl	%ecx, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| 	addl	%esi, %ebx		# e += rotl32(a,5) | ||||
| 	rorl	$2, %edx		# b = rotl32(b,30) | ||||
| # 24 | ||||
| 	movl	-32+4*5(%rsp), %esi	# W[(n+13) & 15] | ||||
| 	xorl	-32+4*0(%rsp), %esi		# ^W[(n+8) & 15] | ||||
| 	xorl	%r10d, %esi		# ^W[(n+2) & 15] | ||||
| 	xorl	%r8d, %esi		# ^W[n & 15] | ||||
| 	roll	%esi			# | ||||
| 	movl	%esi, %r8d		# store to W[n & 15] | ||||
| 	xorl	-32+4*5(%rsp), %r8d	# W[n & 15] ^= W[(n+13) & 15] | ||||
| 	xorl	-32+4*0(%rsp), %r8d	# ^W[(n+8) & 15] | ||||
| 	xorl	%r10d, %r8d	# ^W[(n+2) & 15] | ||||
| 	roll	%r8d		# | ||||
| 	movl	%edx, %edi		# c | ||||
| 	xorl	%ebp, %edi		# ^d | ||||
| 	xorl	%ecx, %edi		# ^b | ||||
| 	leal	0x6ED9EBA1(%rax,%rsi), %eax # e += RCONST + mixed_W | ||||
| 	leal	0x6ED9EBA1(%rax,%r8), %eax # e += RCONST + W[n & 15] | ||||
| 	addl	%edi, %eax		# e += (c ^ d ^ b) | ||||
| 	movl	%ebx, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| 	addl	%esi, %eax		# e += rotl32(a,5) | ||||
| 	rorl	$2, %ecx		# b = rotl32(b,30) | ||||
| # 25 | ||||
| 	movl	-32+4*6(%rsp), %esi	# W[(n+13) & 15] | ||||
| 	xorl	-32+4*1(%rsp), %esi		# ^W[(n+8) & 15] | ||||
| 	xorl	%r11d, %esi		# ^W[(n+2) & 15] | ||||
| 	xorl	%r9d, %esi		# ^W[n & 15] | ||||
| 	roll	%esi			# | ||||
| 	movl	%esi, %r9d		# store to W[n & 15] | ||||
| 	xorl	-32+4*6(%rsp), %r9d	# W[n & 15] ^= W[(n+13) & 15] | ||||
| 	xorl	-32+4*1(%rsp), %r9d	# ^W[(n+8) & 15] | ||||
| 	xorl	%r11d, %r9d	# ^W[(n+2) & 15] | ||||
| 	roll	%r9d		# | ||||
| 	movl	%ecx, %edi		# c | ||||
| 	xorl	%edx, %edi		# ^d | ||||
| 	xorl	%ebx, %edi		# ^b | ||||
| 	leal	0x6ED9EBA1(%rbp,%rsi), %ebp # e += RCONST + mixed_W | ||||
| 	leal	0x6ED9EBA1(%rbp,%r9), %ebp # e += RCONST + W[n & 15] | ||||
| 	addl	%edi, %ebp		# e += (c ^ d ^ b) | ||||
| 	movl	%eax, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| 	addl	%esi, %ebp		# e += rotl32(a,5) | ||||
| 	rorl	$2, %ebx		# b = rotl32(b,30) | ||||
| # 26 | ||||
| 	movl	-32+4*7(%rsp), %esi	# W[(n+13) & 15] | ||||
| 	xorl	-32+4*2(%rsp), %esi		# ^W[(n+8) & 15] | ||||
| 	xorl	%r12d, %esi		# ^W[(n+2) & 15] | ||||
| 	xorl	%r10d, %esi		# ^W[n & 15] | ||||
| 	roll	%esi			# | ||||
| 	movl	%esi, %r10d		# store to W[n & 15] | ||||
| 	xorl	-32+4*7(%rsp), %r10d	# W[n & 15] ^= W[(n+13) & 15] | ||||
| 	xorl	-32+4*2(%rsp), %r10d	# ^W[(n+8) & 15] | ||||
| 	xorl	%r12d, %r10d	# ^W[(n+2) & 15] | ||||
| 	roll	%r10d		# | ||||
| 	movl	%ebx, %edi		# c | ||||
| 	xorl	%ecx, %edi		# ^d | ||||
| 	xorl	%eax, %edi		# ^b | ||||
| 	leal	0x6ED9EBA1(%rdx,%rsi), %edx # e += RCONST + mixed_W | ||||
| 	leal	0x6ED9EBA1(%rdx,%r10), %edx # e += RCONST + W[n & 15] | ||||
| 	addl	%edi, %edx		# e += (c ^ d ^ b) | ||||
| 	movl	%ebp, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| 	addl	%esi, %edx		# e += rotl32(a,5) | ||||
| 	rorl	$2, %eax		# b = rotl32(b,30) | ||||
| # 27 | ||||
| 	movl	%r8d, %esi	# W[(n+13) & 15] | ||||
| 	xorl	-32+4*3(%rsp), %esi		# ^W[(n+8) & 15] | ||||
| 	xorl	%r13d, %esi		# ^W[(n+2) & 15] | ||||
| 	xorl	%r11d, %esi		# ^W[n & 15] | ||||
| 	roll	%esi			# | ||||
| 	movl	%esi, %r11d		# store to W[n & 15] | ||||
| 	xorl	%r8d, %r11d	# W[n & 15] ^= W[(n+13) & 15] | ||||
| 	xorl	-32+4*3(%rsp), %r11d	# ^W[(n+8) & 15] | ||||
| 	xorl	%r13d, %r11d	# ^W[(n+2) & 15] | ||||
| 	roll	%r11d		# | ||||
| 	movl	%eax, %edi		# c | ||||
| 	xorl	%ebx, %edi		# ^d | ||||
| 	xorl	%ebp, %edi		# ^b | ||||
| 	leal	0x6ED9EBA1(%rcx,%rsi), %ecx # e += RCONST + mixed_W | ||||
| 	leal	0x6ED9EBA1(%rcx,%r11), %ecx # e += RCONST + W[n & 15] | ||||
| 	addl	%edi, %ecx		# e += (c ^ d ^ b) | ||||
| 	movl	%edx, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| 	addl	%esi, %ecx		# e += rotl32(a,5) | ||||
| 	rorl	$2, %ebp		# b = rotl32(b,30) | ||||
| # 28 | ||||
| 	movl	%r9d, %esi	# W[(n+13) & 15] | ||||
| 	xorl	-32+4*4(%rsp), %esi		# ^W[(n+8) & 15] | ||||
| 	xorl	%r14d, %esi		# ^W[(n+2) & 15] | ||||
| 	xorl	%r12d, %esi		# ^W[n & 15] | ||||
| 	roll	%esi			# | ||||
| 	movl	%esi, %r12d		# store to W[n & 15] | ||||
| 	xorl	%r9d, %r12d	# W[n & 15] ^= W[(n+13) & 15] | ||||
| 	xorl	-32+4*4(%rsp), %r12d	# ^W[(n+8) & 15] | ||||
| 	xorl	%r14d, %r12d	# ^W[(n+2) & 15] | ||||
| 	roll	%r12d		# | ||||
| 	movl	%ebp, %edi		# c | ||||
| 	xorl	%eax, %edi		# ^d | ||||
| 	xorl	%edx, %edi		# ^b | ||||
| 	leal	0x6ED9EBA1(%rbx,%rsi), %ebx # e += RCONST + mixed_W | ||||
| 	leal	0x6ED9EBA1(%rbx,%r12), %ebx # e += RCONST + W[n & 15] | ||||
| 	addl	%edi, %ebx		# e += (c ^ d ^ b) | ||||
| 	movl	%ecx, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| 	addl	%esi, %ebx		# e += rotl32(a,5) | ||||
| 	rorl	$2, %edx		# b = rotl32(b,30) | ||||
| # 29 | ||||
| 	movl	%r10d, %esi	# W[(n+13) & 15] | ||||
| 	xorl	-32+4*5(%rsp), %esi		# ^W[(n+8) & 15] | ||||
| 	xorl	%r15d, %esi		# ^W[(n+2) & 15] | ||||
| 	xorl	%r13d, %esi		# ^W[n & 15] | ||||
| 	roll	%esi			# | ||||
| 	movl	%esi, %r13d		# store to W[n & 15] | ||||
| 	xorl	%r10d, %r13d	# W[n & 15] ^= W[(n+13) & 15] | ||||
| 	xorl	-32+4*5(%rsp), %r13d	# ^W[(n+8) & 15] | ||||
| 	xorl	%r15d, %r13d	# ^W[(n+2) & 15] | ||||
| 	roll	%r13d		# | ||||
| 	movl	%edx, %edi		# c | ||||
| 	xorl	%ebp, %edi		# ^d | ||||
| 	xorl	%ecx, %edi		# ^b | ||||
| 	leal	0x6ED9EBA1(%rax,%rsi), %eax # e += RCONST + mixed_W | ||||
| 	leal	0x6ED9EBA1(%rax,%r13), %eax # e += RCONST + W[n & 15] | ||||
| 	addl	%edi, %eax		# e += (c ^ d ^ b) | ||||
| 	movl	%ebx, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| 	addl	%esi, %eax		# e += rotl32(a,5) | ||||
| 	rorl	$2, %ecx		# b = rotl32(b,30) | ||||
| # 30 | ||||
| 	movl	%r11d, %esi	# W[(n+13) & 15] | ||||
| 	xorl	-32+4*6(%rsp), %esi		# ^W[(n+8) & 15] | ||||
| 	xorl	-32+4*0(%rsp), %esi		# ^W[(n+2) & 15] | ||||
| 	xorl	%r14d, %esi		# ^W[n & 15] | ||||
| 	roll	%esi			# | ||||
| 	movl	%esi, %r14d		# store to W[n & 15] | ||||
| 	xorl	%r11d, %r14d	# W[n & 15] ^= W[(n+13) & 15] | ||||
| 	xorl	-32+4*6(%rsp), %r14d	# ^W[(n+8) & 15] | ||||
| 	xorl	-32+4*0(%rsp), %r14d	# ^W[(n+2) & 15] | ||||
| 	roll	%r14d		# | ||||
| 	movl	%ecx, %edi		# c | ||||
| 	xorl	%edx, %edi		# ^d | ||||
| 	xorl	%ebx, %edi		# ^b | ||||
| 	leal	0x6ED9EBA1(%rbp,%rsi), %ebp # e += RCONST + mixed_W | ||||
| 	leal	0x6ED9EBA1(%rbp,%r14), %ebp # e += RCONST + W[n & 15] | ||||
| 	addl	%edi, %ebp		# e += (c ^ d ^ b) | ||||
| 	movl	%eax, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| 	addl	%esi, %ebp		# e += rotl32(a,5) | ||||
| 	rorl	$2, %ebx		# b = rotl32(b,30) | ||||
| # 31 | ||||
| 	movl	%r12d, %esi	# W[(n+13) & 15] | ||||
| 	xorl	-32+4*7(%rsp), %esi		# ^W[(n+8) & 15] | ||||
| 	xorl	-32+4*1(%rsp), %esi		# ^W[(n+2) & 15] | ||||
| 	xorl	%r15d, %esi		# ^W[n & 15] | ||||
| 	roll	%esi			# | ||||
| 	movl	%esi, %r15d		# store to W[n & 15] | ||||
| 	xorl	%r12d, %r15d	# W[n & 15] ^= W[(n+13) & 15] | ||||
| 	xorl	-32+4*7(%rsp), %r15d	# ^W[(n+8) & 15] | ||||
| 	xorl	-32+4*1(%rsp), %r15d	# ^W[(n+2) & 15] | ||||
| 	roll	%r15d		# | ||||
| 	movl	%ebx, %edi		# c | ||||
| 	xorl	%ecx, %edi		# ^d | ||||
| 	xorl	%eax, %edi		# ^b | ||||
| 	leal	0x6ED9EBA1(%rdx,%rsi), %edx # e += RCONST + mixed_W | ||||
| 	leal	0x6ED9EBA1(%rdx,%r15), %edx # e += RCONST + W[n & 15] | ||||
| 	addl	%edi, %edx		# e += (c ^ d ^ b) | ||||
| 	movl	%ebp, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| @@ -512,7 +500,7 @@ sha1_process_block64: | ||||
| 	movl	%eax, %edi		# c | ||||
| 	xorl	%ebx, %edi		# ^d | ||||
| 	xorl	%ebp, %edi		# ^b | ||||
| 	leal	0x6ED9EBA1(%rcx,%rsi), %ecx # e += RCONST + mixed_W | ||||
| 	leal	0x6ED9EBA1(%rcx,%rsi), %ecx # e += RCONST + W[n & 15] | ||||
| 	addl	%edi, %ecx		# e += (c ^ d ^ b) | ||||
| 	movl	%edx, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| @@ -528,7 +516,7 @@ sha1_process_block64: | ||||
| 	movl	%ebp, %edi		# c | ||||
| 	xorl	%eax, %edi		# ^d | ||||
| 	xorl	%edx, %edi		# ^b | ||||
| 	leal	0x6ED9EBA1(%rbx,%rsi), %ebx # e += RCONST + mixed_W | ||||
| 	leal	0x6ED9EBA1(%rbx,%rsi), %ebx # e += RCONST + W[n & 15] | ||||
| 	addl	%edi, %ebx		# e += (c ^ d ^ b) | ||||
| 	movl	%ecx, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| @@ -544,7 +532,7 @@ sha1_process_block64: | ||||
| 	movl	%edx, %edi		# c | ||||
| 	xorl	%ebp, %edi		# ^d | ||||
| 	xorl	%ecx, %edi		# ^b | ||||
| 	leal	0x6ED9EBA1(%rax,%rsi), %eax # e += RCONST + mixed_W | ||||
| 	leal	0x6ED9EBA1(%rax,%rsi), %eax # e += RCONST + W[n & 15] | ||||
| 	addl	%edi, %eax		# e += (c ^ d ^ b) | ||||
| 	movl	%ebx, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| @@ -560,7 +548,7 @@ sha1_process_block64: | ||||
| 	movl	%ecx, %edi		# c | ||||
| 	xorl	%edx, %edi		# ^d | ||||
| 	xorl	%ebx, %edi		# ^b | ||||
| 	leal	0x6ED9EBA1(%rbp,%rsi), %ebp # e += RCONST + mixed_W | ||||
| 	leal	0x6ED9EBA1(%rbp,%rsi), %ebp # e += RCONST + W[n & 15] | ||||
| 	addl	%edi, %ebp		# e += (c ^ d ^ b) | ||||
| 	movl	%eax, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| @@ -576,7 +564,7 @@ sha1_process_block64: | ||||
| 	movl	%ebx, %edi		# c | ||||
| 	xorl	%ecx, %edi		# ^d | ||||
| 	xorl	%eax, %edi		# ^b | ||||
| 	leal	0x6ED9EBA1(%rdx,%rsi), %edx # e += RCONST + mixed_W | ||||
| 	leal	0x6ED9EBA1(%rdx,%rsi), %edx # e += RCONST + W[n & 15] | ||||
| 	addl	%edi, %edx		# e += (c ^ d ^ b) | ||||
| 	movl	%ebp, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| @@ -592,7 +580,7 @@ sha1_process_block64: | ||||
| 	movl	%eax, %edi		# c | ||||
| 	xorl	%ebx, %edi		# ^d | ||||
| 	xorl	%ebp, %edi		# ^b | ||||
| 	leal	0x6ED9EBA1(%rcx,%rsi), %ecx # e += RCONST + mixed_W | ||||
| 	leal	0x6ED9EBA1(%rcx,%rsi), %ecx # e += RCONST + W[n & 15] | ||||
| 	addl	%edi, %ecx		# e += (c ^ d ^ b) | ||||
| 	movl	%edx, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| @@ -608,7 +596,7 @@ sha1_process_block64: | ||||
| 	movl	%ebp, %edi		# c | ||||
| 	xorl	%eax, %edi		# ^d | ||||
| 	xorl	%edx, %edi		# ^b | ||||
| 	leal	0x6ED9EBA1(%rbx,%rsi), %ebx # e += RCONST + mixed_W | ||||
| 	leal	0x6ED9EBA1(%rbx,%rsi), %ebx # e += RCONST + W[n & 15] | ||||
| 	addl	%edi, %ebx		# e += (c ^ d ^ b) | ||||
| 	movl	%ecx, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| @@ -624,7 +612,7 @@ sha1_process_block64: | ||||
| 	movl	%edx, %edi		# c | ||||
| 	xorl	%ebp, %edi		# ^d | ||||
| 	xorl	%ecx, %edi		# ^b | ||||
| 	leal	0x6ED9EBA1(%rax,%rsi), %eax # e += RCONST + mixed_W | ||||
| 	leal	0x6ED9EBA1(%rax,%rsi), %eax # e += RCONST + W[n & 15] | ||||
| 	addl	%edi, %eax		# e += (c ^ d ^ b) | ||||
| 	movl	%ebx, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| @@ -637,14 +625,12 @@ sha1_process_block64: | ||||
| 	andl	%ecx, %esi		# si: b & c | ||||
| 	andl	%edx, %edi		# di: (b | c) & d | ||||
| 	orl	%esi, %edi		# ((b | c) & d) | (b & c) | ||||
| 	movl	-32+4*5(%rsp), %esi	# W[(n+13) & 15] | ||||
| 	xorl	-32+4*0(%rsp), %esi		# ^W[(n+8) & 15] | ||||
| 	xorl	%r10d, %esi		# ^W[(n+2) & 15] | ||||
| 	xorl	%r8d, %esi		# ^W[n & 15] | ||||
| 	roll	%esi			# | ||||
| 	movl	%esi, %r8d		# store to W[n & 15] | ||||
| 	xorl	-32+4*5(%rsp), %r8d	# W[n & 15] ^= W[(n+13) & 15] | ||||
| 	xorl	-32+4*0(%rsp), %r8d	# ^W[(n+8) & 15] | ||||
| 	xorl	%r10d, %r8d	# ^W[(n+2) & 15] | ||||
| 	roll	%r8d		# | ||||
| 	addl	%edi, %ebp		# += ((b | c) & d) | (b & c) | ||||
| 	leal	-0x70e44324(%rbp,%rsi), %ebp # e += RCONST + mixed_W | ||||
| 	leal	-0x70E44324(%rbp,%r8), %ebp # e += RCONST + W[n & 15] | ||||
| 	movl	%eax, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| 	addl	%esi, %ebp		# e += rotl32(a,5) | ||||
| @@ -656,14 +642,12 @@ sha1_process_block64: | ||||
| 	andl	%ebx, %esi		# si: b & c | ||||
| 	andl	%ecx, %edi		# di: (b | c) & d | ||||
| 	orl	%esi, %edi		# ((b | c) & d) | (b & c) | ||||
| 	movl	-32+4*6(%rsp), %esi	# W[(n+13) & 15] | ||||
| 	xorl	-32+4*1(%rsp), %esi		# ^W[(n+8) & 15] | ||||
| 	xorl	%r11d, %esi		# ^W[(n+2) & 15] | ||||
| 	xorl	%r9d, %esi		# ^W[n & 15] | ||||
| 	roll	%esi			# | ||||
| 	movl	%esi, %r9d		# store to W[n & 15] | ||||
| 	xorl	-32+4*6(%rsp), %r9d	# W[n & 15] ^= W[(n+13) & 15] | ||||
| 	xorl	-32+4*1(%rsp), %r9d	# ^W[(n+8) & 15] | ||||
| 	xorl	%r11d, %r9d	# ^W[(n+2) & 15] | ||||
| 	roll	%r9d		# | ||||
| 	addl	%edi, %edx		# += ((b | c) & d) | (b & c) | ||||
| 	leal	-0x70e44324(%rdx,%rsi), %edx # e += RCONST + mixed_W | ||||
| 	leal	-0x70E44324(%rdx,%r9), %edx # e += RCONST + W[n & 15] | ||||
| 	movl	%ebp, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| 	addl	%esi, %edx		# e += rotl32(a,5) | ||||
| @@ -675,14 +659,12 @@ sha1_process_block64: | ||||
| 	andl	%eax, %esi		# si: b & c | ||||
| 	andl	%ebx, %edi		# di: (b | c) & d | ||||
| 	orl	%esi, %edi		# ((b | c) & d) | (b & c) | ||||
| 	movl	-32+4*7(%rsp), %esi	# W[(n+13) & 15] | ||||
| 	xorl	-32+4*2(%rsp), %esi		# ^W[(n+8) & 15] | ||||
| 	xorl	%r12d, %esi		# ^W[(n+2) & 15] | ||||
| 	xorl	%r10d, %esi		# ^W[n & 15] | ||||
| 	roll	%esi			# | ||||
| 	movl	%esi, %r10d		# store to W[n & 15] | ||||
| 	xorl	-32+4*7(%rsp), %r10d	# W[n & 15] ^= W[(n+13) & 15] | ||||
| 	xorl	-32+4*2(%rsp), %r10d	# ^W[(n+8) & 15] | ||||
| 	xorl	%r12d, %r10d	# ^W[(n+2) & 15] | ||||
| 	roll	%r10d		# | ||||
| 	addl	%edi, %ecx		# += ((b | c) & d) | (b & c) | ||||
| 	leal	-0x70e44324(%rcx,%rsi), %ecx # e += RCONST + mixed_W | ||||
| 	leal	-0x70E44324(%rcx,%r10), %ecx # e += RCONST + W[n & 15] | ||||
| 	movl	%edx, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| 	addl	%esi, %ecx		# e += rotl32(a,5) | ||||
| @@ -694,14 +676,12 @@ sha1_process_block64: | ||||
| 	andl	%ebp, %esi		# si: b & c | ||||
| 	andl	%eax, %edi		# di: (b | c) & d | ||||
| 	orl	%esi, %edi		# ((b | c) & d) | (b & c) | ||||
| 	movl	%r8d, %esi	# W[(n+13) & 15] | ||||
| 	xorl	-32+4*3(%rsp), %esi		# ^W[(n+8) & 15] | ||||
| 	xorl	%r13d, %esi		# ^W[(n+2) & 15] | ||||
| 	xorl	%r11d, %esi		# ^W[n & 15] | ||||
| 	roll	%esi			# | ||||
| 	movl	%esi, %r11d		# store to W[n & 15] | ||||
| 	xorl	%r8d, %r11d	# W[n & 15] ^= W[(n+13) & 15] | ||||
| 	xorl	-32+4*3(%rsp), %r11d	# ^W[(n+8) & 15] | ||||
| 	xorl	%r13d, %r11d	# ^W[(n+2) & 15] | ||||
| 	roll	%r11d		# | ||||
| 	addl	%edi, %ebx		# += ((b | c) & d) | (b & c) | ||||
| 	leal	-0x70e44324(%rbx,%rsi), %ebx # e += RCONST + mixed_W | ||||
| 	leal	-0x70E44324(%rbx,%r11), %ebx # e += RCONST + W[n & 15] | ||||
| 	movl	%ecx, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| 	addl	%esi, %ebx		# e += rotl32(a,5) | ||||
| @@ -713,14 +693,12 @@ sha1_process_block64: | ||||
| 	andl	%edx, %esi		# si: b & c | ||||
| 	andl	%ebp, %edi		# di: (b | c) & d | ||||
| 	orl	%esi, %edi		# ((b | c) & d) | (b & c) | ||||
| 	movl	%r9d, %esi	# W[(n+13) & 15] | ||||
| 	xorl	-32+4*4(%rsp), %esi		# ^W[(n+8) & 15] | ||||
| 	xorl	%r14d, %esi		# ^W[(n+2) & 15] | ||||
| 	xorl	%r12d, %esi		# ^W[n & 15] | ||||
| 	roll	%esi			# | ||||
| 	movl	%esi, %r12d		# store to W[n & 15] | ||||
| 	xorl	%r9d, %r12d	# W[n & 15] ^= W[(n+13) & 15] | ||||
| 	xorl	-32+4*4(%rsp), %r12d	# ^W[(n+8) & 15] | ||||
| 	xorl	%r14d, %r12d	# ^W[(n+2) & 15] | ||||
| 	roll	%r12d		# | ||||
| 	addl	%edi, %eax		# += ((b | c) & d) | (b & c) | ||||
| 	leal	-0x70e44324(%rax,%rsi), %eax # e += RCONST + mixed_W | ||||
| 	leal	-0x70E44324(%rax,%r12), %eax # e += RCONST + W[n & 15] | ||||
| 	movl	%ebx, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| 	addl	%esi, %eax		# e += rotl32(a,5) | ||||
| @@ -732,14 +710,12 @@ sha1_process_block64: | ||||
| 	andl	%ecx, %esi		# si: b & c | ||||
| 	andl	%edx, %edi		# di: (b | c) & d | ||||
| 	orl	%esi, %edi		# ((b | c) & d) | (b & c) | ||||
| 	movl	%r10d, %esi	# W[(n+13) & 15] | ||||
| 	xorl	-32+4*5(%rsp), %esi		# ^W[(n+8) & 15] | ||||
| 	xorl	%r15d, %esi		# ^W[(n+2) & 15] | ||||
| 	xorl	%r13d, %esi		# ^W[n & 15] | ||||
| 	roll	%esi			# | ||||
| 	movl	%esi, %r13d		# store to W[n & 15] | ||||
| 	xorl	%r10d, %r13d	# W[n & 15] ^= W[(n+13) & 15] | ||||
| 	xorl	-32+4*5(%rsp), %r13d	# ^W[(n+8) & 15] | ||||
| 	xorl	%r15d, %r13d	# ^W[(n+2) & 15] | ||||
| 	roll	%r13d		# | ||||
| 	addl	%edi, %ebp		# += ((b | c) & d) | (b & c) | ||||
| 	leal	-0x70e44324(%rbp,%rsi), %ebp # e += RCONST + mixed_W | ||||
| 	leal	-0x70E44324(%rbp,%r13), %ebp # e += RCONST + W[n & 15] | ||||
| 	movl	%eax, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| 	addl	%esi, %ebp		# e += rotl32(a,5) | ||||
| @@ -751,14 +727,12 @@ sha1_process_block64: | ||||
| 	andl	%ebx, %esi		# si: b & c | ||||
| 	andl	%ecx, %edi		# di: (b | c) & d | ||||
| 	orl	%esi, %edi		# ((b | c) & d) | (b & c) | ||||
| 	movl	%r11d, %esi	# W[(n+13) & 15] | ||||
| 	xorl	-32+4*6(%rsp), %esi		# ^W[(n+8) & 15] | ||||
| 	xorl	-32+4*0(%rsp), %esi		# ^W[(n+2) & 15] | ||||
| 	xorl	%r14d, %esi		# ^W[n & 15] | ||||
| 	roll	%esi			# | ||||
| 	movl	%esi, %r14d		# store to W[n & 15] | ||||
| 	xorl	%r11d, %r14d	# W[n & 15] ^= W[(n+13) & 15] | ||||
| 	xorl	-32+4*6(%rsp), %r14d	# ^W[(n+8) & 15] | ||||
| 	xorl	-32+4*0(%rsp), %r14d	# ^W[(n+2) & 15] | ||||
| 	roll	%r14d		# | ||||
| 	addl	%edi, %edx		# += ((b | c) & d) | (b & c) | ||||
| 	leal	-0x70e44324(%rdx,%rsi), %edx # e += RCONST + mixed_W | ||||
| 	leal	-0x70E44324(%rdx,%r14), %edx # e += RCONST + W[n & 15] | ||||
| 	movl	%ebp, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| 	addl	%esi, %edx		# e += rotl32(a,5) | ||||
| @@ -770,14 +744,12 @@ sha1_process_block64: | ||||
| 	andl	%eax, %esi		# si: b & c | ||||
| 	andl	%ebx, %edi		# di: (b | c) & d | ||||
| 	orl	%esi, %edi		# ((b | c) & d) | (b & c) | ||||
| 	movl	%r12d, %esi	# W[(n+13) & 15] | ||||
| 	xorl	-32+4*7(%rsp), %esi		# ^W[(n+8) & 15] | ||||
| 	xorl	-32+4*1(%rsp), %esi		# ^W[(n+2) & 15] | ||||
| 	xorl	%r15d, %esi		# ^W[n & 15] | ||||
| 	roll	%esi			# | ||||
| 	movl	%esi, %r15d		# store to W[n & 15] | ||||
| 	xorl	%r12d, %r15d	# W[n & 15] ^= W[(n+13) & 15] | ||||
| 	xorl	-32+4*7(%rsp), %r15d	# ^W[(n+8) & 15] | ||||
| 	xorl	-32+4*1(%rsp), %r15d	# ^W[(n+2) & 15] | ||||
| 	roll	%r15d		# | ||||
| 	addl	%edi, %ecx		# += ((b | c) & d) | (b & c) | ||||
| 	leal	-0x70e44324(%rcx,%rsi), %ecx # e += RCONST + mixed_W | ||||
| 	leal	-0x70E44324(%rcx,%r15), %ecx # e += RCONST + W[n & 15] | ||||
| 	movl	%edx, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| 	addl	%esi, %ecx		# e += rotl32(a,5) | ||||
| @@ -796,7 +768,7 @@ sha1_process_block64: | ||||
| 	roll	%esi			# | ||||
| 	movl	%esi, -32+4*0(%rsp)		# store to W[n & 15] | ||||
| 	addl	%edi, %ebx		# += ((b | c) & d) | (b & c) | ||||
| 	leal	-0x70e44324(%rbx,%rsi), %ebx # e += RCONST + mixed_W | ||||
| 	leal	-0x70E44324(%rbx,%rsi), %ebx # e += RCONST + W[n & 15] | ||||
| 	movl	%ecx, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| 	addl	%esi, %ebx		# e += rotl32(a,5) | ||||
| @@ -815,7 +787,7 @@ sha1_process_block64: | ||||
| 	roll	%esi			# | ||||
| 	movl	%esi, -32+4*1(%rsp)		# store to W[n & 15] | ||||
| 	addl	%edi, %eax		# += ((b | c) & d) | (b & c) | ||||
| 	leal	-0x70e44324(%rax,%rsi), %eax # e += RCONST + mixed_W | ||||
| 	leal	-0x70E44324(%rax,%rsi), %eax # e += RCONST + W[n & 15] | ||||
| 	movl	%ebx, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| 	addl	%esi, %eax		# e += rotl32(a,5) | ||||
| @@ -834,7 +806,7 @@ sha1_process_block64: | ||||
| 	roll	%esi			# | ||||
| 	movl	%esi, -32+4*2(%rsp)		# store to W[n & 15] | ||||
| 	addl	%edi, %ebp		# += ((b | c) & d) | (b & c) | ||||
| 	leal	-0x70e44324(%rbp,%rsi), %ebp # e += RCONST + mixed_W | ||||
| 	leal	-0x70E44324(%rbp,%rsi), %ebp # e += RCONST + W[n & 15] | ||||
| 	movl	%eax, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| 	addl	%esi, %ebp		# e += rotl32(a,5) | ||||
| @@ -853,7 +825,7 @@ sha1_process_block64: | ||||
| 	roll	%esi			# | ||||
| 	movl	%esi, -32+4*3(%rsp)		# store to W[n & 15] | ||||
| 	addl	%edi, %edx		# += ((b | c) & d) | (b & c) | ||||
| 	leal	-0x70e44324(%rdx,%rsi), %edx # e += RCONST + mixed_W | ||||
| 	leal	-0x70E44324(%rdx,%rsi), %edx # e += RCONST + W[n & 15] | ||||
| 	movl	%ebp, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| 	addl	%esi, %edx		# e += rotl32(a,5) | ||||
| @@ -872,7 +844,7 @@ sha1_process_block64: | ||||
| 	roll	%esi			# | ||||
| 	movl	%esi, -32+4*4(%rsp)		# store to W[n & 15] | ||||
| 	addl	%edi, %ecx		# += ((b | c) & d) | (b & c) | ||||
| 	leal	-0x70e44324(%rcx,%rsi), %ecx # e += RCONST + mixed_W | ||||
| 	leal	-0x70E44324(%rcx,%rsi), %ecx # e += RCONST + W[n & 15] | ||||
| 	movl	%edx, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| 	addl	%esi, %ecx		# e += rotl32(a,5) | ||||
| @@ -891,7 +863,7 @@ sha1_process_block64: | ||||
| 	roll	%esi			# | ||||
| 	movl	%esi, -32+4*5(%rsp)		# store to W[n & 15] | ||||
| 	addl	%edi, %ebx		# += ((b | c) & d) | (b & c) | ||||
| 	leal	-0x70e44324(%rbx,%rsi), %ebx # e += RCONST + mixed_W | ||||
| 	leal	-0x70E44324(%rbx,%rsi), %ebx # e += RCONST + W[n & 15] | ||||
| 	movl	%ecx, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| 	addl	%esi, %ebx		# e += rotl32(a,5) | ||||
| @@ -910,7 +882,7 @@ sha1_process_block64: | ||||
| 	roll	%esi			# | ||||
| 	movl	%esi, -32+4*6(%rsp)		# store to W[n & 15] | ||||
| 	addl	%edi, %eax		# += ((b | c) & d) | (b & c) | ||||
| 	leal	-0x70e44324(%rax,%rsi), %eax # e += RCONST + mixed_W | ||||
| 	leal	-0x70E44324(%rax,%rsi), %eax # e += RCONST + W[n & 15] | ||||
| 	movl	%ebx, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| 	addl	%esi, %eax		# e += rotl32(a,5) | ||||
| @@ -929,7 +901,7 @@ sha1_process_block64: | ||||
| 	roll	%esi			# | ||||
| 	movl	%esi, -32+4*7(%rsp)		# store to W[n & 15] | ||||
| 	addl	%edi, %ebp		# += ((b | c) & d) | (b & c) | ||||
| 	leal	-0x70e44324(%rbp,%rsi), %ebp # e += RCONST + mixed_W | ||||
| 	leal	-0x70E44324(%rbp,%rsi), %ebp # e += RCONST + W[n & 15] | ||||
| 	movl	%eax, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| 	addl	%esi, %ebp		# e += rotl32(a,5) | ||||
| @@ -941,14 +913,12 @@ sha1_process_block64: | ||||
| 	andl	%ebx, %esi		# si: b & c | ||||
| 	andl	%ecx, %edi		# di: (b | c) & d | ||||
| 	orl	%esi, %edi		# ((b | c) & d) | (b & c) | ||||
| 	movl	-32+4*5(%rsp), %esi	# W[(n+13) & 15] | ||||
| 	xorl	-32+4*0(%rsp), %esi		# ^W[(n+8) & 15] | ||||
| 	xorl	%r10d, %esi		# ^W[(n+2) & 15] | ||||
| 	xorl	%r8d, %esi		# ^W[n & 15] | ||||
| 	roll	%esi			# | ||||
| 	movl	%esi, %r8d		# store to W[n & 15] | ||||
| 	xorl	-32+4*5(%rsp), %r8d	# W[n & 15] ^= W[(n+13) & 15] | ||||
| 	xorl	-32+4*0(%rsp), %r8d	# ^W[(n+8) & 15] | ||||
| 	xorl	%r10d, %r8d	# ^W[(n+2) & 15] | ||||
| 	roll	%r8d		# | ||||
| 	addl	%edi, %edx		# += ((b | c) & d) | (b & c) | ||||
| 	leal	-0x70e44324(%rdx,%rsi), %edx # e += RCONST + mixed_W | ||||
| 	leal	-0x70E44324(%rdx,%r8), %edx # e += RCONST + W[n & 15] | ||||
| 	movl	%ebp, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| 	addl	%esi, %edx		# e += rotl32(a,5) | ||||
| @@ -960,14 +930,12 @@ sha1_process_block64: | ||||
| 	andl	%eax, %esi		# si: b & c | ||||
| 	andl	%ebx, %edi		# di: (b | c) & d | ||||
| 	orl	%esi, %edi		# ((b | c) & d) | (b & c) | ||||
| 	movl	-32+4*6(%rsp), %esi	# W[(n+13) & 15] | ||||
| 	xorl	-32+4*1(%rsp), %esi		# ^W[(n+8) & 15] | ||||
| 	xorl	%r11d, %esi		# ^W[(n+2) & 15] | ||||
| 	xorl	%r9d, %esi		# ^W[n & 15] | ||||
| 	roll	%esi			# | ||||
| 	movl	%esi, %r9d		# store to W[n & 15] | ||||
| 	xorl	-32+4*6(%rsp), %r9d	# W[n & 15] ^= W[(n+13) & 15] | ||||
| 	xorl	-32+4*1(%rsp), %r9d	# ^W[(n+8) & 15] | ||||
| 	xorl	%r11d, %r9d	# ^W[(n+2) & 15] | ||||
| 	roll	%r9d		# | ||||
| 	addl	%edi, %ecx		# += ((b | c) & d) | (b & c) | ||||
| 	leal	-0x70e44324(%rcx,%rsi), %ecx # e += RCONST + mixed_W | ||||
| 	leal	-0x70E44324(%rcx,%r9), %ecx # e += RCONST + W[n & 15] | ||||
| 	movl	%edx, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| 	addl	%esi, %ecx		# e += rotl32(a,5) | ||||
| @@ -979,14 +947,12 @@ sha1_process_block64: | ||||
| 	andl	%ebp, %esi		# si: b & c | ||||
| 	andl	%eax, %edi		# di: (b | c) & d | ||||
| 	orl	%esi, %edi		# ((b | c) & d) | (b & c) | ||||
| 	movl	-32+4*7(%rsp), %esi	# W[(n+13) & 15] | ||||
| 	xorl	-32+4*2(%rsp), %esi		# ^W[(n+8) & 15] | ||||
| 	xorl	%r12d, %esi		# ^W[(n+2) & 15] | ||||
| 	xorl	%r10d, %esi		# ^W[n & 15] | ||||
| 	roll	%esi			# | ||||
| 	movl	%esi, %r10d		# store to W[n & 15] | ||||
| 	xorl	-32+4*7(%rsp), %r10d	# W[n & 15] ^= W[(n+13) & 15] | ||||
| 	xorl	-32+4*2(%rsp), %r10d	# ^W[(n+8) & 15] | ||||
| 	xorl	%r12d, %r10d	# ^W[(n+2) & 15] | ||||
| 	roll	%r10d		# | ||||
| 	addl	%edi, %ebx		# += ((b | c) & d) | (b & c) | ||||
| 	leal	-0x70e44324(%rbx,%rsi), %ebx # e += RCONST + mixed_W | ||||
| 	leal	-0x70E44324(%rbx,%r10), %ebx # e += RCONST + W[n & 15] | ||||
| 	movl	%ecx, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| 	addl	%esi, %ebx		# e += rotl32(a,5) | ||||
| @@ -998,77 +964,67 @@ sha1_process_block64: | ||||
| 	andl	%edx, %esi		# si: b & c | ||||
| 	andl	%ebp, %edi		# di: (b | c) & d | ||||
| 	orl	%esi, %edi		# ((b | c) & d) | (b & c) | ||||
| 	movl	%r8d, %esi	# W[(n+13) & 15] | ||||
| 	xorl	-32+4*3(%rsp), %esi		# ^W[(n+8) & 15] | ||||
| 	xorl	%r13d, %esi		# ^W[(n+2) & 15] | ||||
| 	xorl	%r11d, %esi		# ^W[n & 15] | ||||
| 	roll	%esi			# | ||||
| 	movl	%esi, %r11d		# store to W[n & 15] | ||||
| 	xorl	%r8d, %r11d	# W[n & 15] ^= W[(n+13) & 15] | ||||
| 	xorl	-32+4*3(%rsp), %r11d	# ^W[(n+8) & 15] | ||||
| 	xorl	%r13d, %r11d	# ^W[(n+2) & 15] | ||||
| 	roll	%r11d		# | ||||
| 	addl	%edi, %eax		# += ((b | c) & d) | (b & c) | ||||
| 	leal	-0x70e44324(%rax,%rsi), %eax # e += RCONST + mixed_W | ||||
| 	leal	-0x70E44324(%rax,%r11), %eax # e += RCONST + W[n & 15] | ||||
| 	movl	%ebx, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| 	addl	%esi, %eax		# e += rotl32(a,5) | ||||
| 	rorl	$2, %ecx		# b = rotl32(b,30) | ||||
| # 60 | ||||
| 	movl	%r9d, %esi	# W[(n+13) & 15] | ||||
| 	xorl	-32+4*4(%rsp), %esi		# ^W[(n+8) & 15] | ||||
| 	xorl	%r14d, %esi		# ^W[(n+2) & 15] | ||||
| 	xorl	%r12d, %esi		# ^W[n & 15] | ||||
| 	roll	%esi			# | ||||
| 	movl	%esi, %r12d		# store to W[n & 15] | ||||
| 	xorl	%r9d, %r12d	# W[n & 15] ^= W[(n+13) & 15] | ||||
| 	xorl	-32+4*4(%rsp), %r12d	# ^W[(n+8) & 15] | ||||
| 	xorl	%r14d, %r12d	# ^W[(n+2) & 15] | ||||
| 	roll	%r12d		# | ||||
| 	movl	%ecx, %edi		# c | ||||
| 	xorl	%edx, %edi		# ^d | ||||
| 	xorl	%ebx, %edi		# ^b | ||||
| 	leal	-0x359d3e2a(%rbp,%rsi), %ebp # e += RCONST + mixed_W | ||||
| 	leal	-0x359D3E2A(%rbp,%r12), %ebp # e += RCONST + W[n & 15] | ||||
| 	addl	%edi, %ebp		# e += (c ^ d ^ b) | ||||
| 	movl	%eax, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| 	addl	%esi, %ebp		# e += rotl32(a,5) | ||||
| 	rorl	$2, %ebx		# b = rotl32(b,30) | ||||
| # 61 | ||||
| 	movl	%r10d, %esi	# W[(n+13) & 15] | ||||
| 	xorl	-32+4*5(%rsp), %esi		# ^W[(n+8) & 15] | ||||
| 	xorl	%r15d, %esi		# ^W[(n+2) & 15] | ||||
| 	xorl	%r13d, %esi		# ^W[n & 15] | ||||
| 	roll	%esi			# | ||||
| 	movl	%esi, %r13d		# store to W[n & 15] | ||||
| 	xorl	%r10d, %r13d	# W[n & 15] ^= W[(n+13) & 15] | ||||
| 	xorl	-32+4*5(%rsp), %r13d	# ^W[(n+8) & 15] | ||||
| 	xorl	%r15d, %r13d	# ^W[(n+2) & 15] | ||||
| 	roll	%r13d		# | ||||
| 	movl	%ebx, %edi		# c | ||||
| 	xorl	%ecx, %edi		# ^d | ||||
| 	xorl	%eax, %edi		# ^b | ||||
| 	leal	-0x359d3e2a(%rdx,%rsi), %edx # e += RCONST + mixed_W | ||||
| 	leal	-0x359D3E2A(%rdx,%r13), %edx # e += RCONST + W[n & 15] | ||||
| 	addl	%edi, %edx		# e += (c ^ d ^ b) | ||||
| 	movl	%ebp, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| 	addl	%esi, %edx		# e += rotl32(a,5) | ||||
| 	rorl	$2, %eax		# b = rotl32(b,30) | ||||
| # 62 | ||||
| 	movl	%r11d, %esi	# W[(n+13) & 15] | ||||
| 	xorl	-32+4*6(%rsp), %esi		# ^W[(n+8) & 15] | ||||
| 	xorl	-32+4*0(%rsp), %esi		# ^W[(n+2) & 15] | ||||
| 	xorl	%r14d, %esi		# ^W[n & 15] | ||||
| 	roll	%esi			# | ||||
| 	movl	%esi, %r14d		# store to W[n & 15] | ||||
| 	xorl	%r11d, %r14d	# W[n & 15] ^= W[(n+13) & 15] | ||||
| 	xorl	-32+4*6(%rsp), %r14d	# ^W[(n+8) & 15] | ||||
| 	xorl	-32+4*0(%rsp), %r14d	# ^W[(n+2) & 15] | ||||
| 	roll	%r14d		# | ||||
| 	movl	%eax, %edi		# c | ||||
| 	xorl	%ebx, %edi		# ^d | ||||
| 	xorl	%ebp, %edi		# ^b | ||||
| 	leal	-0x359d3e2a(%rcx,%rsi), %ecx # e += RCONST + mixed_W | ||||
| 	leal	-0x359D3E2A(%rcx,%r14), %ecx # e += RCONST + W[n & 15] | ||||
| 	addl	%edi, %ecx		# e += (c ^ d ^ b) | ||||
| 	movl	%edx, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| 	addl	%esi, %ecx		# e += rotl32(a,5) | ||||
| 	rorl	$2, %ebp		# b = rotl32(b,30) | ||||
| # 63 | ||||
| 	movl	%r12d, %esi	# W[(n+13) & 15] | ||||
| 	xorl	-32+4*7(%rsp), %esi		# ^W[(n+8) & 15] | ||||
| 	xorl	-32+4*1(%rsp), %esi		# ^W[(n+2) & 15] | ||||
| 	xorl	%r15d, %esi		# ^W[n & 15] | ||||
| 	roll	%esi			# | ||||
| 	movl	%esi, %r15d		# store to W[n & 15] | ||||
| 	xorl	%r12d, %r15d	# W[n & 15] ^= W[(n+13) & 15] | ||||
| 	xorl	-32+4*7(%rsp), %r15d	# ^W[(n+8) & 15] | ||||
| 	xorl	-32+4*1(%rsp), %r15d	# ^W[(n+2) & 15] | ||||
| 	roll	%r15d		# | ||||
| 	movl	%ebp, %edi		# c | ||||
| 	xorl	%eax, %edi		# ^d | ||||
| 	xorl	%edx, %edi		# ^b | ||||
| 	leal	-0x359d3e2a(%rbx,%rsi), %ebx # e += RCONST + mixed_W | ||||
| 	leal	-0x359D3E2A(%rbx,%r15), %ebx # e += RCONST + W[n & 15] | ||||
| 	addl	%edi, %ebx		# e += (c ^ d ^ b) | ||||
| 	movl	%ecx, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| @@ -1084,7 +1040,7 @@ sha1_process_block64: | ||||
| 	movl	%edx, %edi		# c | ||||
| 	xorl	%ebp, %edi		# ^d | ||||
| 	xorl	%ecx, %edi		# ^b | ||||
| 	leal	-0x359d3e2a(%rax,%rsi), %eax # e += RCONST + mixed_W | ||||
| 	leal	-0x359D3E2A(%rax,%rsi), %eax # e += RCONST + W[n & 15] | ||||
| 	addl	%edi, %eax		# e += (c ^ d ^ b) | ||||
| 	movl	%ebx, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| @@ -1100,7 +1056,7 @@ sha1_process_block64: | ||||
| 	movl	%ecx, %edi		# c | ||||
| 	xorl	%edx, %edi		# ^d | ||||
| 	xorl	%ebx, %edi		# ^b | ||||
| 	leal	-0x359d3e2a(%rbp,%rsi), %ebp # e += RCONST + mixed_W | ||||
| 	leal	-0x359D3E2A(%rbp,%rsi), %ebp # e += RCONST + W[n & 15] | ||||
| 	addl	%edi, %ebp		# e += (c ^ d ^ b) | ||||
| 	movl	%eax, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| @@ -1116,7 +1072,7 @@ sha1_process_block64: | ||||
| 	movl	%ebx, %edi		# c | ||||
| 	xorl	%ecx, %edi		# ^d | ||||
| 	xorl	%eax, %edi		# ^b | ||||
| 	leal	-0x359d3e2a(%rdx,%rsi), %edx # e += RCONST + mixed_W | ||||
| 	leal	-0x359D3E2A(%rdx,%rsi), %edx # e += RCONST + W[n & 15] | ||||
| 	addl	%edi, %edx		# e += (c ^ d ^ b) | ||||
| 	movl	%ebp, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| @@ -1132,7 +1088,7 @@ sha1_process_block64: | ||||
| 	movl	%eax, %edi		# c | ||||
| 	xorl	%ebx, %edi		# ^d | ||||
| 	xorl	%ebp, %edi		# ^b | ||||
| 	leal	-0x359d3e2a(%rcx,%rsi), %ecx # e += RCONST + mixed_W | ||||
| 	leal	-0x359D3E2A(%rcx,%rsi), %ecx # e += RCONST + W[n & 15] | ||||
| 	addl	%edi, %ecx		# e += (c ^ d ^ b) | ||||
| 	movl	%edx, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| @@ -1148,7 +1104,7 @@ sha1_process_block64: | ||||
| 	movl	%ebp, %edi		# c | ||||
| 	xorl	%eax, %edi		# ^d | ||||
| 	xorl	%edx, %edi		# ^b | ||||
| 	leal	-0x359d3e2a(%rbx,%rsi), %ebx # e += RCONST + mixed_W | ||||
| 	leal	-0x359D3E2A(%rbx,%rsi), %ebx # e += RCONST + W[n & 15] | ||||
| 	addl	%edi, %ebx		# e += (c ^ d ^ b) | ||||
| 	movl	%ecx, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| @@ -1164,7 +1120,7 @@ sha1_process_block64: | ||||
| 	movl	%edx, %edi		# c | ||||
| 	xorl	%ebp, %edi		# ^d | ||||
| 	xorl	%ecx, %edi		# ^b | ||||
| 	leal	-0x359d3e2a(%rax,%rsi), %eax # e += RCONST + mixed_W | ||||
| 	leal	-0x359D3E2A(%rax,%rsi), %eax # e += RCONST + W[n & 15] | ||||
| 	addl	%edi, %eax		# e += (c ^ d ^ b) | ||||
| 	movl	%ebx, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| @@ -1180,7 +1136,7 @@ sha1_process_block64: | ||||
| 	movl	%ecx, %edi		# c | ||||
| 	xorl	%edx, %edi		# ^d | ||||
| 	xorl	%ebx, %edi		# ^b | ||||
| 	leal	-0x359d3e2a(%rbp,%rsi), %ebp # e += RCONST + mixed_W | ||||
| 	leal	-0x359D3E2A(%rbp,%rsi), %ebp # e += RCONST + W[n & 15] | ||||
| 	addl	%edi, %ebp		# e += (c ^ d ^ b) | ||||
| 	movl	%eax, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| @@ -1196,135 +1152,119 @@ sha1_process_block64: | ||||
| 	movl	%ebx, %edi		# c | ||||
| 	xorl	%ecx, %edi		# ^d | ||||
| 	xorl	%eax, %edi		# ^b | ||||
| 	leal	-0x359d3e2a(%rdx,%rsi), %edx # e += RCONST + mixed_W | ||||
| 	leal	-0x359D3E2A(%rdx,%rsi), %edx # e += RCONST + W[n & 15] | ||||
| 	addl	%edi, %edx		# e += (c ^ d ^ b) | ||||
| 	movl	%ebp, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| 	addl	%esi, %edx		# e += rotl32(a,5) | ||||
| 	rorl	$2, %eax		# b = rotl32(b,30) | ||||
| # 72 | ||||
| 	movl	-32+4*5(%rsp), %esi	# W[(n+13) & 15] | ||||
| 	xorl	-32+4*0(%rsp), %esi		# ^W[(n+8) & 15] | ||||
| 	xorl	%r10d, %esi		# ^W[(n+2) & 15] | ||||
| 	xorl	%r8d, %esi		# ^W[n & 15] | ||||
| 	roll	%esi			# | ||||
| 	movl	%esi, %r8d		# store to W[n & 15] | ||||
| 	xorl	-32+4*5(%rsp), %r8d	# W[n & 15] ^= W[(n+13) & 15] | ||||
| 	xorl	-32+4*0(%rsp), %r8d	# ^W[(n+8) & 15] | ||||
| 	xorl	%r10d, %r8d	# ^W[(n+2) & 15] | ||||
| 	roll	%r8d		# | ||||
| 	movl	%eax, %edi		# c | ||||
| 	xorl	%ebx, %edi		# ^d | ||||
| 	xorl	%ebp, %edi		# ^b | ||||
| 	leal	-0x359d3e2a(%rcx,%rsi), %ecx # e += RCONST + mixed_W | ||||
| 	leal	-0x359D3E2A(%rcx,%r8), %ecx # e += RCONST + W[n & 15] | ||||
| 	addl	%edi, %ecx		# e += (c ^ d ^ b) | ||||
| 	movl	%edx, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| 	addl	%esi, %ecx		# e += rotl32(a,5) | ||||
| 	rorl	$2, %ebp		# b = rotl32(b,30) | ||||
| # 73 | ||||
| 	movl	-32+4*6(%rsp), %esi	# W[(n+13) & 15] | ||||
| 	xorl	-32+4*1(%rsp), %esi		# ^W[(n+8) & 15] | ||||
| 	xorl	%r11d, %esi		# ^W[(n+2) & 15] | ||||
| 	xorl	%r9d, %esi		# ^W[n & 15] | ||||
| 	roll	%esi			# | ||||
| 	movl	%esi, %r9d		# store to W[n & 15] | ||||
| 	xorl	-32+4*6(%rsp), %r9d	# W[n & 15] ^= W[(n+13) & 15] | ||||
| 	xorl	-32+4*1(%rsp), %r9d	# ^W[(n+8) & 15] | ||||
| 	xorl	%r11d, %r9d	# ^W[(n+2) & 15] | ||||
| 	roll	%r9d		# | ||||
| 	movl	%ebp, %edi		# c | ||||
| 	xorl	%eax, %edi		# ^d | ||||
| 	xorl	%edx, %edi		# ^b | ||||
| 	leal	-0x359d3e2a(%rbx,%rsi), %ebx # e += RCONST + mixed_W | ||||
| 	leal	-0x359D3E2A(%rbx,%r9), %ebx # e += RCONST + W[n & 15] | ||||
| 	addl	%edi, %ebx		# e += (c ^ d ^ b) | ||||
| 	movl	%ecx, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| 	addl	%esi, %ebx		# e += rotl32(a,5) | ||||
| 	rorl	$2, %edx		# b = rotl32(b,30) | ||||
| # 74 | ||||
| 	movl	-32+4*7(%rsp), %esi	# W[(n+13) & 15] | ||||
| 	xorl	-32+4*2(%rsp), %esi		# ^W[(n+8) & 15] | ||||
| 	xorl	%r12d, %esi		# ^W[(n+2) & 15] | ||||
| 	xorl	%r10d, %esi		# ^W[n & 15] | ||||
| 	roll	%esi			# | ||||
| 	movl	%esi, %r10d		# store to W[n & 15] | ||||
| 	xorl	-32+4*7(%rsp), %r10d	# W[n & 15] ^= W[(n+13) & 15] | ||||
| 	xorl	-32+4*2(%rsp), %r10d	# ^W[(n+8) & 15] | ||||
| 	xorl	%r12d, %r10d	# ^W[(n+2) & 15] | ||||
| 	roll	%r10d		# | ||||
| 	movl	%edx, %edi		# c | ||||
| 	xorl	%ebp, %edi		# ^d | ||||
| 	xorl	%ecx, %edi		# ^b | ||||
| 	leal	-0x359d3e2a(%rax,%rsi), %eax # e += RCONST + mixed_W | ||||
| 	leal	-0x359D3E2A(%rax,%r10), %eax # e += RCONST + W[n & 15] | ||||
| 	addl	%edi, %eax		# e += (c ^ d ^ b) | ||||
| 	movl	%ebx, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| 	addl	%esi, %eax		# e += rotl32(a,5) | ||||
| 	rorl	$2, %ecx		# b = rotl32(b,30) | ||||
| # 75 | ||||
| 	movl	%r8d, %esi	# W[(n+13) & 15] | ||||
| 	xorl	-32+4*3(%rsp), %esi		# ^W[(n+8) & 15] | ||||
| 	xorl	%r13d, %esi		# ^W[(n+2) & 15] | ||||
| 	xorl	%r11d, %esi		# ^W[n & 15] | ||||
| 	roll	%esi			# | ||||
| 	movl	%esi, %r11d		# store to W[n & 15] | ||||
| 	xorl	%r8d, %r11d	# W[n & 15] ^= W[(n+13) & 15] | ||||
| 	xorl	-32+4*3(%rsp), %r11d	# ^W[(n+8) & 15] | ||||
| 	xorl	%r13d, %r11d	# ^W[(n+2) & 15] | ||||
| 	roll	%r11d		# | ||||
| 	movl	%ecx, %edi		# c | ||||
| 	xorl	%edx, %edi		# ^d | ||||
| 	xorl	%ebx, %edi		# ^b | ||||
| 	leal	-0x359d3e2a(%rbp,%rsi), %ebp # e += RCONST + mixed_W | ||||
| 	leal	-0x359D3E2A(%rbp,%r11), %ebp # e += RCONST + W[n & 15] | ||||
| 	addl	%edi, %ebp		# e += (c ^ d ^ b) | ||||
| 	movl	%eax, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| 	addl	%esi, %ebp		# e += rotl32(a,5) | ||||
| 	rorl	$2, %ebx		# b = rotl32(b,30) | ||||
| # 76 | ||||
| 	movl	%r9d, %esi	# W[(n+13) & 15] | ||||
| 	xorl	-32+4*4(%rsp), %esi		# ^W[(n+8) & 15] | ||||
| 	xorl	%r14d, %esi		# ^W[(n+2) & 15] | ||||
| 	xorl	%r12d, %esi		# ^W[n & 15] | ||||
| 	roll	%esi			# | ||||
| 	movl	%esi, %r12d		# store to W[n & 15] | ||||
| 	xorl	%r9d, %r12d	# W[n & 15] ^= W[(n+13) & 15] | ||||
| 	xorl	-32+4*4(%rsp), %r12d	# ^W[(n+8) & 15] | ||||
| 	xorl	%r14d, %r12d	# ^W[(n+2) & 15] | ||||
| 	roll	%r12d		# | ||||
| 	movl	%ebx, %edi		# c | ||||
| 	xorl	%ecx, %edi		# ^d | ||||
| 	xorl	%eax, %edi		# ^b | ||||
| 	leal	-0x359d3e2a(%rdx,%rsi), %edx # e += RCONST + mixed_W | ||||
| 	leal	-0x359D3E2A(%rdx,%r12), %edx # e += RCONST + W[n & 15] | ||||
| 	addl	%edi, %edx		# e += (c ^ d ^ b) | ||||
| 	movl	%ebp, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| 	addl	%esi, %edx		# e += rotl32(a,5) | ||||
| 	rorl	$2, %eax		# b = rotl32(b,30) | ||||
| # 77 | ||||
| 	movl	%r10d, %esi	# W[(n+13) & 15] | ||||
| 	xorl	-32+4*5(%rsp), %esi		# ^W[(n+8) & 15] | ||||
| 	xorl	%r15d, %esi		# ^W[(n+2) & 15] | ||||
| 	xorl	%r13d, %esi		# ^W[n & 15] | ||||
| 	roll	%esi			# | ||||
| 	# store to W[n & 15] - unused, not done | ||||
| 	xorl	%r10d, %r13d	# W[n & 15] ^= W[(n+13) & 15] | ||||
| 	xorl	-32+4*5(%rsp), %r13d	# ^W[(n+8) & 15] | ||||
| 	xorl	%r15d, %r13d	# ^W[(n+2) & 15] | ||||
| 	roll	%r13d		# | ||||
| 	movl	%eax, %edi		# c | ||||
| 	xorl	%ebx, %edi		# ^d | ||||
| 	xorl	%ebp, %edi		# ^b | ||||
| 	leal	-0x359d3e2a(%rcx,%rsi), %ecx # e += RCONST + mixed_W | ||||
| 	leal	-0x359D3E2A(%rcx,%r13), %ecx # e += RCONST + W[n & 15] | ||||
| 	addl	%edi, %ecx		# e += (c ^ d ^ b) | ||||
| 	movl	%edx, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| 	addl	%esi, %ecx		# e += rotl32(a,5) | ||||
| 	rorl	$2, %ebp		# b = rotl32(b,30) | ||||
| # 78 | ||||
| 	movl	%r11d, %esi	# W[(n+13) & 15] | ||||
| 	xorl	-32+4*6(%rsp), %esi		# ^W[(n+8) & 15] | ||||
| 	xorl	-32+4*0(%rsp), %esi		# ^W[(n+2) & 15] | ||||
| 	xorl	%r14d, %esi		# ^W[n & 15] | ||||
| 	roll	%esi			# | ||||
| 	# store to W[n & 15] - unused, not done | ||||
| 	xorl	%r11d, %r14d	# W[n & 15] ^= W[(n+13) & 15] | ||||
| 	xorl	-32+4*6(%rsp), %r14d	# ^W[(n+8) & 15] | ||||
| 	xorl	-32+4*0(%rsp), %r14d	# ^W[(n+2) & 15] | ||||
| 	roll	%r14d		# | ||||
| 	movl	%ebp, %edi		# c | ||||
| 	xorl	%eax, %edi		# ^d | ||||
| 	xorl	%edx, %edi		# ^b | ||||
| 	leal	-0x359d3e2a(%rbx,%rsi), %ebx # e += RCONST + mixed_W | ||||
| 	leal	-0x359D3E2A(%rbx,%r14), %ebx # e += RCONST + W[n & 15] | ||||
| 	addl	%edi, %ebx		# e += (c ^ d ^ b) | ||||
| 	movl	%ecx, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
| 	addl	%esi, %ebx		# e += rotl32(a,5) | ||||
| 	rorl	$2, %edx		# b = rotl32(b,30) | ||||
| # 79 | ||||
| 	movl	%r12d, %esi	# W[(n+13) & 15] | ||||
| 	xorl	-32+4*7(%rsp), %esi		# ^W[(n+8) & 15] | ||||
| 	xorl	-32+4*1(%rsp), %esi		# ^W[(n+2) & 15] | ||||
| 	xorl	%r15d, %esi		# ^W[n & 15] | ||||
| 	roll	%esi			# | ||||
| 	# store to W[n & 15] - unused, not done | ||||
| 	xorl	%r12d, %r15d	# W[n & 15] ^= W[(n+13) & 15] | ||||
| 	xorl	-32+4*7(%rsp), %r15d	# ^W[(n+8) & 15] | ||||
| 	xorl	-32+4*1(%rsp), %r15d	# ^W[(n+2) & 15] | ||||
| 	roll	%r15d		# | ||||
| 	movl	%edx, %edi		# c | ||||
| 	xorl	%ebp, %edi		# ^d | ||||
| 	xorl	%ecx, %edi		# ^b | ||||
| 	leal	-0x359d3e2a(%rax,%rsi), %eax # e += RCONST + mixed_W | ||||
| 	leal	-0x359D3E2A(%rax,%r15), %eax # e += RCONST + W[n & 15] | ||||
| 	addl	%edi, %eax		# e += (c ^ d ^ b) | ||||
| 	movl	%ebx, %esi		# | ||||
| 	roll	$5, %esi		# rotl32(a,5) | ||||
|   | ||||
							
								
								
									
										267
									
								
								libbb/hash_md5_sha_x86-64.S.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										267
									
								
								libbb/hash_md5_sha_x86-64.S.sh
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,267 @@ | ||||
| #!/bin/sh | ||||
|  | ||||
| # We don't regenerate it on every "make" invocation - only by hand. | ||||
| # The reason is that the changes to generated code are difficult | ||||
| # to visualize by looking only at this script, it helps when the commit | ||||
| # also contains the diff of the generated file. | ||||
| exec >hash_md5_sha_x86-64.S | ||||
|  | ||||
| echo \ | ||||
| '### Generated by hash_md5_sha_x86-64.S.sh ### | ||||
|  | ||||
| #if CONFIG_SHA1_SMALL == 0 && defined(__GNUC__) && defined(__x86_64__) | ||||
| 	.section	.text.sha1_process_block64,"ax",@progbits | ||||
| 	.globl  sha1_process_block64 | ||||
| 	.hidden sha1_process_block64 | ||||
| 	.type	sha1_process_block64, @function | ||||
|  | ||||
| 	.balign	8	# allow decoders to fetch at least 4 first insns | ||||
| sha1_process_block64: | ||||
| 	pushq	%r15	# | ||||
| 	pushq	%r14	# | ||||
| 	pushq	%r13	# | ||||
| 	pushq	%r12	# | ||||
| 	pushq	%rbp	# | ||||
| 	pushq	%rbx	# | ||||
| 	pushq	%rdi	# we need ctx at the end | ||||
|  | ||||
| #Register and stack use: | ||||
| # eax..edx: a..d | ||||
| # ebp: e | ||||
| # esi,edi: temps | ||||
| # -32+4*n(%rsp),r8...r15: W[0..7,8..15] | ||||
| # (TODO: actually W[0..7] are used a bit more often, put _thme_ into r8..r15?) | ||||
|  | ||||
| 	movq	4*8(%rdi), %r8 | ||||
| 	bswapq	%r8 | ||||
| 	movl	%r8d, %r9d | ||||
| 	shrq	$32, %r8 | ||||
| 	movq	4*10(%rdi), %r10 | ||||
| 	bswapq	%r10 | ||||
| 	movl	%r10d, %r11d | ||||
| 	shrq	$32, %r10 | ||||
| 	movq	4*12(%rdi), %r12 | ||||
| 	bswapq	%r12 | ||||
| 	movl	%r12d, %r13d | ||||
| 	shrq	$32, %r12 | ||||
| 	movq	4*14(%rdi), %r14 | ||||
| 	bswapq	%r14 | ||||
| 	movl	%r14d, %r15d | ||||
| 	shrq	$32, %r14 | ||||
|  | ||||
| 	movl	$3, %eax | ||||
| 1: | ||||
| 	movq	(%rdi,%rax,8), %rsi | ||||
| 	bswapq	%rsi | ||||
| 	rolq	$32, %rsi | ||||
| 	movq	%rsi, -32(%rsp,%rax,8) | ||||
| 	decl	%eax | ||||
| 	jns	1b | ||||
| 	movl	80(%rdi), %eax		# a = ctx->hash[0] | ||||
| 	movl	84(%rdi), %ebx		# b = ctx->hash[1] | ||||
| 	movl	88(%rdi), %ecx		# c = ctx->hash[2] | ||||
| 	movl	92(%rdi), %edx		# d = ctx->hash[3] | ||||
| 	movl	96(%rdi), %ebp		# e = ctx->hash[4] | ||||
| ' | ||||
| W32() { | ||||
| test "$1" || exit 1 | ||||
| test "$1" -lt 0 && exit 1 | ||||
| test "$1" -gt 15 && exit 1 | ||||
| test "$1" -lt 8 && echo "-32+4*$1(%rsp)" | ||||
| test "$1" -ge 8 && echo "%r${1}d" | ||||
| } | ||||
|  | ||||
| RD1A() { | ||||
| local a=$1;local b=$2;local c=$3;local d=$4;local e=$5 | ||||
| local n=$(($6)) | ||||
| echo "# $n" | ||||
| test $n = 0 && echo " | ||||
| 	# W[0], already in %esi | ||||
| ";test $n != 0 && test $n -lt 8 && echo " | ||||
| 	movl	`W32 $n`, %esi		# W[n] | ||||
| ";test $n -ge 8 && echo " | ||||
| 	# W[n], in %r$n | ||||
| ";echo " | ||||
| 	movl	%e$c, %edi		# c | ||||
| 	xorl	%e$d, %edi		# ^d | ||||
| 	andl	%e$b, %edi		# &b | ||||
| 	xorl	%e$d, %edi		# (((c ^ d) & b) ^ d) | ||||
| ";test $n -lt 8 && echo " | ||||
| 	leal	$RCONST(%r$e,%rsi),%e$e # e += RCONST + W[n] | ||||
| ";test $n -ge 8 && echo " | ||||
| 	leal	$RCONST(%r$e,%r$n),%e$e # e += RCONST + W[n] | ||||
| ";echo " | ||||
| 	addl	%edi, %e$e		# e += (((c ^ d) & b) ^ d) | ||||
| 	movl	%e$a, %esi		# | ||||
| 	roll	\$5, %esi		# rotl32(a,5) | ||||
| 	addl	%esi, %e$e		# e += rotl32(a,5) | ||||
| 	rorl	\$2, %e$b		# b = rotl32(b,30) | ||||
| " | ||||
| } | ||||
| RD1B() { | ||||
| local a=$1;local b=$2;local c=$3;local d=$4;local e=$5 | ||||
| local n=$(($6)) | ||||
| local n13=$(((n+13) & 15)) | ||||
| local n8=$(((n+8) & 15)) | ||||
| local n2=$(((n+2) & 15)) | ||||
| local n0=$(((n+0) & 15)) | ||||
| echo " | ||||
| # $n | ||||
| ";test $n0 -lt 8 && echo " | ||||
| 	movl	`W32 $n13`, %esi	# W[(n+13) & 15] | ||||
| 	xorl	`W32 $n8`, %esi		# ^W[(n+8) & 15] | ||||
| 	xorl	`W32 $n2`, %esi		# ^W[(n+2) & 15] | ||||
| 	xorl	`W32 $n0`, %esi		# ^W[n & 15] | ||||
| 	roll	%esi			# | ||||
| 	movl	%esi, `W32 $n0`		# store to W[n & 15] | ||||
| ";test $n0 -ge 8 && echo " | ||||
| 	xorl	`W32 $n13`, `W32 $n0`	# W[n & 15] ^= W[(n+13) & 15] | ||||
| 	xorl	`W32 $n8`, `W32 $n0`	# ^W[(n+8) & 15] | ||||
| 	xorl	`W32 $n2`, `W32 $n0`	# ^W[(n+2) & 15] | ||||
| 	roll	`W32 $n0`		# | ||||
| "; echo " | ||||
| 	movl	%e$c, %edi		# c | ||||
| 	xorl	%e$d, %edi		# ^d | ||||
| 	andl	%e$b, %edi		# &b | ||||
| 	xorl	%e$d, %edi		# (((c ^ d) & b) ^ d) | ||||
| ";test $n0 -lt 8 && echo " | ||||
| 	leal	$RCONST(%r$e,%rsi), %e$e # e += RCONST + W[n & 15] | ||||
| ";test $n0 -ge 8 && echo " | ||||
| 	leal	$RCONST(%r$e,%r$n0), %e$e # e += RCONST + W[n & 15] | ||||
| ";echo " | ||||
| 	addl	%edi, %e$e		# e += (((c ^ d) & b) ^ d) | ||||
| 	movl	%e$a, %esi		# | ||||
| 	roll	\$5, %esi		# rotl32(a,5) | ||||
| 	addl	%esi, %e$e		# e += rotl32(a,5) | ||||
| 	rorl	\$2, %e$b		# b = rotl32(b,30) | ||||
| " | ||||
| } | ||||
| { | ||||
| RCONST=0x5A827999 | ||||
| RD1A ax bx cx dx bp  0; RD1A bp ax bx cx dx  1; RD1A dx bp ax bx cx  2; RD1A cx dx bp ax bx  3; RD1A bx cx dx bp ax  4 | ||||
| RD1A ax bx cx dx bp  5; RD1A bp ax bx cx dx  6; RD1A dx bp ax bx cx  7; RD1A cx dx bp ax bx  8; RD1A bx cx dx bp ax  9 | ||||
| RD1A ax bx cx dx bp 10; RD1A bp ax bx cx dx 11; RD1A dx bp ax bx cx 12; RD1A cx dx bp ax bx 13; RD1A bx cx dx bp ax 14 | ||||
| RD1A ax bx cx dx bp 15; RD1B bp ax bx cx dx 16; RD1B dx bp ax bx cx 17; RD1B cx dx bp ax bx 18; RD1B bx cx dx bp ax 19 | ||||
| } | grep -v '^$' | ||||
|  | ||||
| RD2() { | ||||
| local a=$1;local b=$2;local c=$3;local d=$4;local e=$5 | ||||
| local n=$(($6)) | ||||
| local n13=$(((n+13) & 15)) | ||||
| local n8=$(((n+8) & 15)) | ||||
| local n2=$(((n+2) & 15)) | ||||
| local n0=$(((n+0) & 15)) | ||||
| echo " | ||||
| # $n | ||||
| ";test $n0 -lt 8 && echo " | ||||
| 	movl	`W32 $n13`, %esi	# W[(n+13) & 15] | ||||
| 	xorl	`W32 $n8`, %esi		# ^W[(n+8) & 15] | ||||
| 	xorl	`W32 $n2`, %esi		# ^W[(n+2) & 15] | ||||
| 	xorl	`W32 $n0`, %esi		# ^W[n & 15] | ||||
| 	roll	%esi			# | ||||
| 	movl	%esi, `W32 $n0`		# store to W[n & 15] | ||||
| ";test $n0 -ge 8 && echo " | ||||
| 	xorl	`W32 $n13`, `W32 $n0`	# W[n & 15] ^= W[(n+13) & 15] | ||||
| 	xorl	`W32 $n8`, `W32 $n0`	# ^W[(n+8) & 15] | ||||
| 	xorl	`W32 $n2`, `W32 $n0`	# ^W[(n+2) & 15] | ||||
| 	roll	`W32 $n0`		# | ||||
| "; echo " | ||||
| 	movl	%e$c, %edi		# c | ||||
| 	xorl	%e$d, %edi		# ^d | ||||
| 	xorl	%e$b, %edi		# ^b | ||||
| ";test $n0 -lt 8 && echo " | ||||
| 	leal	$RCONST(%r$e,%rsi), %e$e # e += RCONST + W[n & 15] | ||||
| ";test $n0 -ge 8 && echo " | ||||
| 	leal	$RCONST(%r$e,%r$n0), %e$e # e += RCONST + W[n & 15] | ||||
| ";echo " | ||||
| 	addl	%edi, %e$e		# e += (c ^ d ^ b) | ||||
| 	movl	%e$a, %esi		# | ||||
| 	roll	\$5, %esi		# rotl32(a,5) | ||||
| 	addl	%esi, %e$e		# e += rotl32(a,5) | ||||
| 	rorl	\$2, %e$b		# b = rotl32(b,30) | ||||
| " | ||||
| } | ||||
| { | ||||
| RCONST=0x6ED9EBA1 | ||||
| RD2 ax bx cx dx bp 20; RD2 bp ax bx cx dx 21; RD2 dx bp ax bx cx 22; RD2 cx dx bp ax bx 23; RD2 bx cx dx bp ax 24 | ||||
| RD2 ax bx cx dx bp 25; RD2 bp ax bx cx dx 26; RD2 dx bp ax bx cx 27; RD2 cx dx bp ax bx 28; RD2 bx cx dx bp ax 29 | ||||
| RD2 ax bx cx dx bp 30; RD2 bp ax bx cx dx 31; RD2 dx bp ax bx cx 32; RD2 cx dx bp ax bx 33; RD2 bx cx dx bp ax 34 | ||||
| RD2 ax bx cx dx bp 35; RD2 bp ax bx cx dx 36; RD2 dx bp ax bx cx 37; RD2 cx dx bp ax bx 38; RD2 bx cx dx bp ax 39 | ||||
| } | grep -v '^$' | ||||
|  | ||||
| RD3() { | ||||
| local a=$1;local b=$2;local c=$3;local d=$4;local e=$5 | ||||
| local n=$(($6)) | ||||
| local n13=$(((n+13) & 15)) | ||||
| local n8=$(((n+8) & 15)) | ||||
| local n2=$(((n+2) & 15)) | ||||
| local n0=$(((n+0) & 15)) | ||||
| echo " | ||||
| # $n | ||||
| 	movl	%e$b, %edi		# di: b | ||||
| 	movl	%e$b, %esi		# si: b | ||||
| 	orl	%e$c, %edi		# di: b | c | ||||
| 	andl	%e$c, %esi		# si: b & c | ||||
| 	andl	%e$d, %edi		# di: (b | c) & d | ||||
| 	orl	%esi, %edi		# ((b | c) & d) | (b & c) | ||||
| ";test $n0 -lt 8 && echo " | ||||
| 	movl	`W32 $n13`, %esi	# W[(n+13) & 15] | ||||
| 	xorl	`W32 $n8`, %esi		# ^W[(n+8) & 15] | ||||
| 	xorl	`W32 $n2`, %esi		# ^W[(n+2) & 15] | ||||
| 	xorl	`W32 $n0`, %esi		# ^W[n & 15] | ||||
| 	roll	%esi			# | ||||
| 	movl	%esi, `W32 $n0`		# store to W[n & 15] | ||||
| ";test $n0 -ge 8 && echo " | ||||
| 	xorl	`W32 $n13`, `W32 $n0`	# W[n & 15] ^= W[(n+13) & 15] | ||||
| 	xorl	`W32 $n8`, `W32 $n0`	# ^W[(n+8) & 15] | ||||
| 	xorl	`W32 $n2`, `W32 $n0`	# ^W[(n+2) & 15] | ||||
| 	roll	`W32 $n0`		# | ||||
| "; echo " | ||||
| 	addl	%edi, %e$e		# += ((b | c) & d) | (b & c) | ||||
| ";test $n0 -lt 8 && echo " | ||||
| 	leal	$RCONST(%r$e,%rsi), %e$e # e += RCONST + W[n & 15] | ||||
| ";test $n0 -ge 8 && echo " | ||||
| 	leal	$RCONST(%r$e,%r$n0), %e$e # e += RCONST + W[n & 15] | ||||
| ";echo " | ||||
| 	movl	%e$a, %esi		# | ||||
| 	roll	\$5, %esi		# rotl32(a,5) | ||||
| 	addl	%esi, %e$e		# e += rotl32(a,5) | ||||
| 	rorl	\$2, %e$b		# b = rotl32(b,30) | ||||
| " | ||||
| } | ||||
| { | ||||
| #RCONST=0x8F1BBCDC "out of range for signed 32bit displacement" | ||||
| RCONST=-0x70E44324 | ||||
| RD3 ax bx cx dx bp 40; RD3 bp ax bx cx dx 41; RD3 dx bp ax bx cx 42; RD3 cx dx bp ax bx 43; RD3 bx cx dx bp ax 44 | ||||
| RD3 ax bx cx dx bp 45; RD3 bp ax bx cx dx 46; RD3 dx bp ax bx cx 47; RD3 cx dx bp ax bx 48; RD3 bx cx dx bp ax 49 | ||||
| RD3 ax bx cx dx bp 50; RD3 bp ax bx cx dx 51; RD3 dx bp ax bx cx 52; RD3 cx dx bp ax bx 53; RD3 bx cx dx bp ax 54 | ||||
| RD3 ax bx cx dx bp 55; RD3 bp ax bx cx dx 56; RD3 dx bp ax bx cx 57; RD3 cx dx bp ax bx 58; RD3 bx cx dx bp ax 59 | ||||
| } | grep -v '^$' | ||||
|  | ||||
| # Round 4 has the same logic as round 2, only n and RCONST are different | ||||
| { | ||||
| #RCONST=0xCA62C1D6 "out of range for signed 32bit displacement" | ||||
| RCONST=-0x359D3E2A | ||||
| RD2 ax bx cx dx bp 60; RD2 bp ax bx cx dx 61; RD2 dx bp ax bx cx 62; RD2 cx dx bp ax bx 63; RD2 bx cx dx bp ax 64 | ||||
| RD2 ax bx cx dx bp 65; RD2 bp ax bx cx dx 66; RD2 dx bp ax bx cx 67; RD2 cx dx bp ax bx 68; RD2 bx cx dx bp ax 69 | ||||
| RD2 ax bx cx dx bp 70; RD2 bp ax bx cx dx 71; RD2 dx bp ax bx cx 72; RD2 cx dx bp ax bx 73; RD2 bx cx dx bp ax 74 | ||||
| RD2 ax bx cx dx bp 75; RD2 bp ax bx cx dx 76; RD2 dx bp ax bx cx 77; RD2 cx dx bp ax bx 78; RD2 bx cx dx bp ax 79 | ||||
| } | grep -v '^$' | ||||
|  | ||||
| echo " | ||||
| 	popq	%rdi		# | ||||
| 	addl	%eax, 80(%rdi)  # ctx->hash[0] += a | ||||
| 	addl	%ebx, 84(%rdi)  # ctx->hash[1] += b | ||||
| 	addl	%ecx, 88(%rdi)  # ctx->hash[2] += c | ||||
| 	addl	%edx, 92(%rdi)  # ctx->hash[3] += d | ||||
| 	addl	%ebp, 96(%rdi)  # ctx->hash[4] += e | ||||
| 	popq	%rbx		# | ||||
| 	popq	%rbp		# | ||||
| 	popq	%r12		# | ||||
| 	popq	%r13		# | ||||
| 	popq	%r14		# | ||||
| 	popq	%r15		# | ||||
|  | ||||
| 	ret | ||||
| 	.size	sha1_process_block64, .-sha1_process_block64 | ||||
| #endif" | ||||
		Reference in New Issue
	
	Block a user