libbb/sha1: x86_64 version: move to a separate .S file, no code changes
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
		| @@ -56,6 +56,7 @@ lib-y += login.o | ||||
| lib-y += make_directory.o | ||||
| lib-y += makedev.o | ||||
| lib-y += hash_md5_sha.o | ||||
| lib-y += hash_md5_sha_x86-64.o | ||||
| # Alternative (disabled) MD5 implementation | ||||
| #lib-y += hash_md5prime.o | ||||
| lib-y += messages.o | ||||
|   | ||||
| @@ -696,397 +696,11 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx UNUSED_PARAM) | ||||
| #undef RCONST | ||||
| } | ||||
| # elif defined(__GNUC__) && defined(__x86_64__) | ||||
| static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx UNUSED_PARAM) | ||||
| { | ||||
| 	BUILD_BUG_ON(offsetof(sha1_ctx_t, hash) != 80); | ||||
| 	asm( | ||||
| "\n\ | ||||
| 	pushq	%r15		#                                   \n\ | ||||
| 	pushq	%r14		#                                   \n\ | ||||
| 	pushq	%r13		#                                   \n\ | ||||
| 	pushq	%r12		#                                   \n\ | ||||
| 	pushq	%rbp		#                                   \n\ | ||||
| 	pushq	%rbx		#                                   \n\ | ||||
| 	pushq	%rdi		# we need ctx at the end            \n\ | ||||
|                                                                     \n\ | ||||
| #Register and stack use:                                            \n\ | ||||
| # eax..edx: a..d                                                    \n\ | ||||
| # ebp: e                                                            \n\ | ||||
| # esi,edi: temps                                                    \n\ | ||||
| # -32+4*n(%rsp),r8...r15: W[0..7,8..15]                             \n\ | ||||
| 	.macro	loadW n,r                                           \n\ | ||||
| 	.if \\n == 0                                                \n\ | ||||
| 	movl	-32+4*0(%rsp),\\r                                   \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 1                                                \n\ | ||||
| 	movl	-32+4*1(%rsp),\\r                                   \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 2                                                \n\ | ||||
| 	movl	-32+4*2(%rsp),\\r                                   \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 3                                                \n\ | ||||
| 	movl	-32+4*3(%rsp),\\r                                   \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 4                                                \n\ | ||||
| 	movl	-32+4*4(%rsp),\\r                                   \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 5                                                \n\ | ||||
| 	movl	-32+4*5(%rsp),\\r                                   \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 6                                                \n\ | ||||
| 	movl	-32+4*6(%rsp),\\r                                   \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 7                                                \n\ | ||||
| 	movl	-32+4*7(%rsp),\\r                                   \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 8                                                \n\ | ||||
| 	movl	%r8d,\\r                                            \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 9                                                \n\ | ||||
| 	movl	%r9d,\\r                                            \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 10                                               \n\ | ||||
| 	movl	%r10d,\\r                                           \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 11                                               \n\ | ||||
| 	movl	%r11d,\\r                                           \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 12                                               \n\ | ||||
| 	movl	%r12d,\\r                                           \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 13                                               \n\ | ||||
| 	movl	%r13d,\\r                                           \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 14                                               \n\ | ||||
| 	movl	%r14d,\\r                                           \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 15                                               \n\ | ||||
| 	movl	%r15d,\\r                                           \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.endm                                                       \n\ | ||||
|                                                                     \n\ | ||||
| 	.macro	storeW r,n                                          \n\ | ||||
| 	.if \\n == 0                                                \n\ | ||||
| 	movl	\\r,-32+4*0(%rsp)                                   \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 1                                                \n\ | ||||
| 	movl	\\r,-32+4*1(%rsp)                                   \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 2                                                \n\ | ||||
| 	movl	\\r,-32+4*2(%rsp)                                   \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 3                                                \n\ | ||||
| 	movl	\\r,-32+4*3(%rsp)                                   \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 4                                                \n\ | ||||
| 	movl	\\r,-32+4*4(%rsp)                                   \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 5                                                \n\ | ||||
| 	movl	\\r,-32+4*5(%rsp)                                   \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 6                                                \n\ | ||||
| 	movl	\\r,-32+4*6(%rsp)                                   \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 7                                                \n\ | ||||
| 	movl	\\r,-32+4*7(%rsp)                                   \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 8                                                \n\ | ||||
| 	movl	\\r,%r8d                                            \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 9                                                \n\ | ||||
| 	movl	\\r,%r9d                                            \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 10                                               \n\ | ||||
| 	movl	\\r,%r10d                                           \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 11                                               \n\ | ||||
| 	movl	\\r,%r11d                                           \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 12                                               \n\ | ||||
| 	movl	\\r,%r12d                                           \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 13                                               \n\ | ||||
| 	movl	\\r,%r13d                                           \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 14                                               \n\ | ||||
| 	movl	\\r,%r14d                                           \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 15                                               \n\ | ||||
| 	movl	\\r,%r15d                                           \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.endm                                                       \n\ | ||||
|                                                                     \n\ | ||||
| 	.macro	xorW n,r                                            \n\ | ||||
| 	.if \\n == 0                                                \n\ | ||||
| 	xorl	-32+4*0(%rsp),\\r                                   \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 1                                                \n\ | ||||
| 	xorl	-32+4*1(%rsp),\\r                                   \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 2                                                \n\ | ||||
| 	xorl	-32+4*2(%rsp),\\r                                   \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 3                                                \n\ | ||||
| 	xorl	-32+4*3(%rsp),\\r                                   \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 4                                                \n\ | ||||
| 	xorl	-32+4*4(%rsp),\\r                                   \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 5                                                \n\ | ||||
| 	xorl	-32+4*5(%rsp),\\r                                   \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 6                                                \n\ | ||||
| 	xorl	-32+4*6(%rsp),\\r                                   \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 7                                                \n\ | ||||
| 	xorl	-32+4*7(%rsp),\\r                                   \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 8                                                \n\ | ||||
| 	xorl	%r8d,\\r                                            \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 9                                                \n\ | ||||
| 	xorl	%r9d,\\r                                            \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 10                                               \n\ | ||||
| 	xorl	%r10d,\\r                                           \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 11                                               \n\ | ||||
| 	xorl	%r11d,\\r                                           \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 12                                               \n\ | ||||
| 	xorl	%r12d,\\r                                           \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 13                                               \n\ | ||||
| 	xorl	%r13d,\\r                                           \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 14                                               \n\ | ||||
| 	xorl	%r14d,\\r                                           \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.if \\n == 15                                               \n\ | ||||
| 	xorl	%r15d,\\r                                           \n\ | ||||
| 	.endif                                                      \n\ | ||||
| 	.endm                                                       \n\ | ||||
|                                                                     \n\ | ||||
| 	movq	4*8(%rdi), %r8                                      \n\ | ||||
| 	bswap	%r8                                                 \n\ | ||||
| 	movl	%r8d, %r9d                                          \n\ | ||||
| 	shrq	$32, %r8                                            \n\ | ||||
| 	movq	4*10(%rdi), %r10                                    \n\ | ||||
| 	bswap	%r10                                                \n\ | ||||
| 	movl	%r10d, %r11d                                        \n\ | ||||
| 	shrq	$32, %r10                                           \n\ | ||||
| 	movq	4*12(%rdi), %r12                                    \n\ | ||||
| 	bswap	%r12                                                \n\ | ||||
| 	movl	%r12d, %r13d                                        \n\ | ||||
| 	shrq	$32, %r12                                           \n\ | ||||
| 	movq	4*14(%rdi), %r14                                    \n\ | ||||
| 	bswap	%r14                                                \n\ | ||||
| 	movl	%r14d, %r15d                                        \n\ | ||||
| 	shrq	$32, %r14                                           \n\ | ||||
|                                                                     \n\ | ||||
| 	movl	$3, %eax                                            \n\ | ||||
| 1:                                                                  \n\ | ||||
| 	movq	(%rdi,%rax,8), %rsi                                 \n\ | ||||
| 	bswap	%rsi                                                \n\ | ||||
| 	rolq	$32, %rsi                                           \n\ | ||||
| 	movq	%rsi, -32(%rsp,%rax,8)                              \n\ | ||||
| 	decl	%eax                                                \n\ | ||||
| 	jns	1b                                                  \n\ | ||||
| 	movl	80(%rdi), %eax	# a = ctx->hash[0]                  \n\ | ||||
| 	movl	84(%rdi), %ebx	# b = ctx->hash[1]                  \n\ | ||||
| 	movl	88(%rdi), %ecx	# c = ctx->hash[2]                  \n\ | ||||
| 	movl	92(%rdi), %edx	# d = ctx->hash[3]                  \n\ | ||||
| 	movl	96(%rdi), %ebp	# e = ctx->hash[4]                  \n\ | ||||
| " | ||||
| #define RD1As(a,b,c,d,e, n, RCONST) \ | ||||
| "\n\ | ||||
| 	##loadW	"n", %esi		# n=0, W[0] already in %esi \n\ | ||||
| 	movl	%e"c", %edi		# c                         \n\ | ||||
| 	xorl	%e"d", %edi		# ^d                        \n\ | ||||
| 	andl	%e"b", %edi		# &b                        \n\ | ||||
| 	xorl	%e"d", %edi		# (((c ^ d) & b) ^ d)       \n\ | ||||
| 	leal	"RCONST"(%r"e",%rsi), %e"e" # e += RCONST + W[n]    \n\ | ||||
| 	addl	%edi, %e"e"		# e += (((c ^ d) & b) ^ d)  \n\ | ||||
| 	movl	%e"a", %esi		#                           \n\ | ||||
| 	roll	$5, %esi		# rotl32(a,5)               \n\ | ||||
| 	addl	%esi, %e"e"		# e += rotl32(a,5)          \n\ | ||||
| 	rorl	$2, %e"b"		# b = rotl32(b,30)          \n\ | ||||
| " | ||||
| #define RD1Bs(a,b,c,d,e, n, RCONST) \ | ||||
| "\n\ | ||||
| 	loadW	"n", %esi		# W[n]                      \n\ | ||||
| 	movl	%e"c", %edi		# c                         \n\ | ||||
| 	xorl	%e"d", %edi		# ^d                        \n\ | ||||
| 	andl	%e"b", %edi		# &b                        \n\ | ||||
| 	xorl	%e"d", %edi		# (((c ^ d) & b) ^ d)       \n\ | ||||
| 	leal	"RCONST"(%r"e",%rsi), %e"e" # e += RCONST + W[n]    \n\ | ||||
| 	addl	%edi, %e"e"		# e += (((c ^ d) & b) ^ d)  \n\ | ||||
| 	movl	%e"a", %esi		#                           \n\ | ||||
| 	roll	$5, %esi		# rotl32(a,5)               \n\ | ||||
| 	addl	%esi, %e"e"		# e += rotl32(a,5)          \n\ | ||||
| 	rorl	$2, %e"b"		# b = rotl32(b,30)          \n\ | ||||
| " | ||||
| #define RD1Cs(a,b,c,d,e, n, RCONST) \ | ||||
| "\n\ | ||||
| 	movl	%e"c", %edi		# c                         \n\ | ||||
| 	xorl	%e"d", %edi		# ^d                        \n\ | ||||
| 	andl	%e"b", %edi		# &b                        \n\ | ||||
| 	xorl	%e"d", %edi		# (((c ^ d) & b) ^ d)       \n\ | ||||
| 	leal	"RCONST"(%r"e",%r"n"), %e"e" # e += RCONST + W[n]   \n\ | ||||
| 	addl	%edi, %e"e"		# e += (((c ^ d) & b) ^ d)  \n\ | ||||
| 	movl	%e"a", %esi		#                           \n\ | ||||
| 	roll	$5, %esi		# rotl32(a,5)               \n\ | ||||
| 	addl	%esi, %e"e"		# e += rotl32(a,5)          \n\ | ||||
| 	rorl	$2, %e"b"		# b = rotl32(b,30)          \n\ | ||||
| " | ||||
| #define RD1Ds(a,b,c,d,e, n13,n8,n2,n, RCONST) \ | ||||
| "\n\ | ||||
| 	loadW	"n13", %esi		# W[(n+13) & 15]            \n\ | ||||
| 	xorW	"n8", %esi		# ^W[(n+8) & 15]            \n\ | ||||
| 	xorW	"n2", %esi		# ^W[(n+2) & 15]            \n\ | ||||
| 	xorW	"n", %esi		# ^W[n & 15]                \n\ | ||||
| 	roll	%esi			#                           \n\ | ||||
| 	storeW	%esi, "n"		# store to W[n & 15]        \n\ | ||||
| 	movl	%e"c", %edi		# c                         \n\ | ||||
| 	xorl	%e"d", %edi		# ^d                        \n\ | ||||
| 	andl	%e"b", %edi		# &b                        \n\ | ||||
| 	xorl	%e"d", %edi		# (((c ^ d) & b) ^ d)       \n\ | ||||
| 	leal	"RCONST"(%r"e",%rsi), %e"e" # e += RCONST + mixed_W \n\ | ||||
| 	addl	%edi, %e"e"		# e += (((c ^ d) & b) ^ d)  \n\ | ||||
| 	movl	%e"a", %esi		#                           \n\ | ||||
| 	roll	$5, %esi		# rotl32(a,5)               \n\ | ||||
| 	addl	%esi, %e"e"		# e += rotl32(a,5)          \n\ | ||||
| 	rorl	$2, %e"b"		# b = rotl32(b,30)          \n\ | ||||
| " | ||||
| #define RD1A(a,b,c,d,e, n) RD1As(STR(a),STR(b),STR(c),STR(d),STR(e), STR(n), STR(RCONST)) | ||||
| #define RD1B(a,b,c,d,e, n) RD1Bs(STR(a),STR(b),STR(c),STR(d),STR(e), STR(n), STR(RCONST)) | ||||
| #define RD1C(a,b,c,d,e, n) RD1Cs(STR(a),STR(b),STR(c),STR(d),STR(e), STR(n), STR(RCONST)) | ||||
| #define RD1D(a,b,c,d,e, n) RD1Ds(STR(a),STR(b),STR(c),STR(d),STR(e), STR(((n+13)&15)), STR(((n+8)&15)), STR(((n+2)&15)), STR(((n)&15)), STR(RCONST)) | ||||
| #undef  RCONST | ||||
| #define RCONST 0x5A827999 | ||||
| 	RD1A(ax,bx,cx,dx,bp, 0) RD1B(bp,ax,bx,cx,dx, 1) RD1B(dx,bp,ax,bx,cx, 2) RD1B(cx,dx,bp,ax,bx, 3) RD1B(bx,cx,dx,bp,ax, 4) | ||||
| 	RD1B(ax,bx,cx,dx,bp, 5) RD1B(bp,ax,bx,cx,dx, 6) RD1B(dx,bp,ax,bx,cx, 7) RD1C(cx,dx,bp,ax,bx, 8) RD1C(bx,cx,dx,bp,ax, 9) | ||||
| 	RD1C(ax,bx,cx,dx,bp,10) RD1C(bp,ax,bx,cx,dx,11) RD1C(dx,bp,ax,bx,cx,12) RD1C(cx,dx,bp,ax,bx,13) RD1C(bx,cx,dx,bp,ax,14) | ||||
| 	RD1C(ax,bx,cx,dx,bp,15) RD1D(bp,ax,bx,cx,dx,16) RD1D(dx,bp,ax,bx,cx,17) RD1D(cx,dx,bp,ax,bx,18) RD1D(bx,cx,dx,bp,ax,19) | ||||
| #define RD2s(a,b,c,d,e, n13,n8,n2,n, RCONST) \ | ||||
| "\n\ | ||||
| 	loadW	"n13", %esi		# W[(n+13) & 15]            \n\ | ||||
| 	xorW	"n8", %esi		# ^W[(n+8) & 15]            \n\ | ||||
| 	xorW	"n2", %esi		# ^W[(n+2) & 15]            \n\ | ||||
| 	xorW	"n", %esi		# ^W[n & 15]                \n\ | ||||
| 	roll	%esi			#                           \n\ | ||||
| 	storeW	%esi, "n"		# store to W[n & 15]        \n\ | ||||
| 	movl	%e"c", %edi		# c                         \n\ | ||||
| 	xorl	%e"d", %edi		# ^d                        \n\ | ||||
| 	xorl	%e"b", %edi		# ^b                        \n\ | ||||
| 	leal	"RCONST"(%r"e",%rsi), %e"e" # e += RCONST + mixed_W \n\ | ||||
| 	addl	%edi, %e"e"		# e += (c ^ d ^ b)          \n\ | ||||
| 	movl	%e"a", %esi		#                           \n\ | ||||
| 	roll	$5, %esi		# rotl32(a,5)               \n\ | ||||
| 	addl	%esi, %e"e"		# e += rotl32(a,5)          \n\ | ||||
| 	rorl	$2, %e"b"		# b = rotl32(b,30)          \n\ | ||||
| " | ||||
| #define RD2(a,b,c,d,e, n) RD2s(STR(a),STR(b),STR(c),STR(d),STR(e), STR(((20+n+13)&15)), STR(((20+n+8)&15)), STR(((20+n+2)&15)), STR(((20+n)&15)), STR(RCONST)) | ||||
| #undef  RCONST | ||||
| #define RCONST 0x6ED9EBA1 | ||||
| 	RD2(ax,bx,cx,dx,bp, 0) RD2(bp,ax,bx,cx,dx, 1) RD2(dx,bp,ax,bx,cx, 2) RD2(cx,dx,bp,ax,bx, 3) RD2(bx,cx,dx,bp,ax, 4) | ||||
| 	RD2(ax,bx,cx,dx,bp, 5) RD2(bp,ax,bx,cx,dx, 6) RD2(dx,bp,ax,bx,cx, 7) RD2(cx,dx,bp,ax,bx, 8) RD2(bx,cx,dx,bp,ax, 9) | ||||
| 	RD2(ax,bx,cx,dx,bp,10) RD2(bp,ax,bx,cx,dx,11) RD2(dx,bp,ax,bx,cx,12) RD2(cx,dx,bp,ax,bx,13) RD2(bx,cx,dx,bp,ax,14) | ||||
| 	RD2(ax,bx,cx,dx,bp,15) RD2(bp,ax,bx,cx,dx,16) RD2(dx,bp,ax,bx,cx,17) RD2(cx,dx,bp,ax,bx,18) RD2(bx,cx,dx,bp,ax,19) | ||||
|  | ||||
| #define RD3s(a,b,c,d,e, n13,n8,n2,n, RCONST) \ | ||||
| "\n\ | ||||
| 	movl	%e"b", %edi		# di: b                     \n\ | ||||
| 	movl	%e"b", %esi		# si: b                     \n\ | ||||
| 	orl	%e"c", %edi		# di: b | c                 \n\ | ||||
| 	andl	%e"c", %esi		# si: b & c                 \n\ | ||||
| 	andl	%e"d", %edi		# di: (b | c) & d           \n\ | ||||
| 	orl	%esi, %edi		# ((b | c) & d) | (b & c)   \n\ | ||||
| 	loadW	"n13", %esi		# W[(n+13) & 15]            \n\ | ||||
| 	xorW	"n8", %esi		# ^W[(n+8) & 15]            \n\ | ||||
| 	xorW	"n2", %esi		# ^W[(n+2) & 15]            \n\ | ||||
| 	xorW	"n", %esi		# ^W[n & 15]                \n\ | ||||
| 	roll	%esi			#                           \n\ | ||||
| 	storeW	%esi, "n"		# store to W[n & 15]        \n\ | ||||
| 	addl	%edi, %e"e"		# += ((b | c) & d) | (b & c)\n\ | ||||
| 	leal	"RCONST"(%r"e",%rsi), %e"e" # e += RCONST + mixed_W \n\ | ||||
| 	movl	%e"a", %esi		#                           \n\ | ||||
| 	roll	$5, %esi		# rotl32(a,5)               \n\ | ||||
| 	addl	%esi, %e"e"		# e += rotl32(a,5)          \n\ | ||||
| 	rorl	$2, %e"b"		# b = rotl32(b,30)          \n\ | ||||
| " | ||||
| #define RD3(a,b,c,d,e, n) RD3s(STR(a),STR(b),STR(c),STR(d),STR(e), STR(((40+n+13)&15)), STR(((40+n+8)&15)), STR(((40+n+2)&15)), STR(((40+n)&15)), STR(RCONST)) | ||||
| #undef  RCONST | ||||
| //#define RCONST 0x8F1BBCDC "out of range for signed 32bit displacement" | ||||
| #define RCONST  -0x70e44324 | ||||
| 	RD3(ax,bx,cx,dx,bp, 0) RD3(bp,ax,bx,cx,dx, 1) RD3(dx,bp,ax,bx,cx, 2) RD3(cx,dx,bp,ax,bx, 3) RD3(bx,cx,dx,bp,ax, 4) | ||||
| 	RD3(ax,bx,cx,dx,bp, 5) RD3(bp,ax,bx,cx,dx, 6) RD3(dx,bp,ax,bx,cx, 7) RD3(cx,dx,bp,ax,bx, 8) RD3(bx,cx,dx,bp,ax, 9) | ||||
| 	RD3(ax,bx,cx,dx,bp,10) RD3(bp,ax,bx,cx,dx,11) RD3(dx,bp,ax,bx,cx,12) RD3(cx,dx,bp,ax,bx,13) RD3(bx,cx,dx,bp,ax,14) | ||||
| 	RD3(ax,bx,cx,dx,bp,15) RD3(bp,ax,bx,cx,dx,16) RD3(dx,bp,ax,bx,cx,17) RD3(cx,dx,bp,ax,bx,18) RD3(bx,cx,dx,bp,ax,19) | ||||
| /* in hash_md5_sha_x86-64.S */ | ||||
| struct ASM_expects_80 { char t[1 - 2*(offsetof(sha1_ctx_t, hash) != 80)]; }; | ||||
| void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx UNUSED_PARAM); | ||||
|  | ||||
| #define RD4As(a,b,c,d,e, n13,n8,n2,n, RCONST) \ | ||||
| "\n\ | ||||
| 	loadW	"n13", %esi		# W[(n+13) & 15]            \n\ | ||||
| 	xorW	"n8", %esi		# ^W[(n+8) & 15]            \n\ | ||||
| 	xorW	"n2", %esi		# ^W[(n+2) & 15]            \n\ | ||||
| 	xorW	"n", %esi		# ^W[n & 15]                \n\ | ||||
| 	roll	%esi			#                           \n\ | ||||
| 	storeW	%esi, "n"		# store to W[n & 15]        \n\ | ||||
| 	movl	%e"c", %edi		# c                         \n\ | ||||
| 	xorl	%e"d", %edi		# ^d                        \n\ | ||||
| 	xorl	%e"b", %edi		# ^b                        \n\ | ||||
| 	leal	"RCONST"(%r"e",%rsi), %e"e" # e += RCONST + mixed_W \n\ | ||||
| 	addl	%edi, %e"e"		# e += (c ^ d ^ b)          \n\ | ||||
| 	movl	%e"a", %esi		#                           \n\ | ||||
| 	roll	$5, %esi		# rotl32(a,5)               \n\ | ||||
| 	addl	%esi, %e"e"		# e += rotl32(a,5)          \n\ | ||||
| 	rorl	$2, %e"b"		# b = rotl32(b,30)          \n\ | ||||
| " | ||||
| #define RD4Bs(a,b,c,d,e, n13,n8,n2,n, RCONST) \ | ||||
| "\n\ | ||||
| 	loadW	"n13", %esi		# W[(n+13) & 15]            \n\ | ||||
| 	xorW	"n8", %esi		# ^W[(n+8) & 15]            \n\ | ||||
| 	xorW	"n2", %esi		# ^W[(n+2) & 15]            \n\ | ||||
| 	xorW	"n", %esi		# ^W[n & 15]                \n\ | ||||
| 	roll	%esi			#                           \n\ | ||||
| 	#storeW	%esi, "n"		# store to W[n & 15] elided \n\ | ||||
| 	movl	%e"c", %edi		# c                         \n\ | ||||
| 	xorl	%e"d", %edi		# ^d                        \n\ | ||||
| 	xorl	%e"b", %edi		# ^b                        \n\ | ||||
| 	leal	"RCONST"(%r"e",%rsi), %e"e" # e += RCONST + mixed_W \n\ | ||||
| 	addl	%edi, %e"e"		# e += (c ^ d ^ b)          \n\ | ||||
| 	movl	%e"a", %esi		#                           \n\ | ||||
| 	roll	$5, %esi		# rotl32(a,5)               \n\ | ||||
| 	addl	%esi, %e"e"		# e += rotl32(a,5)          \n\ | ||||
| 	rorl	$2, %e"b"		# b = rotl32(b,30)          \n\ | ||||
| " | ||||
| #define RD4A(a,b,c,d,e, n) RD4As(STR(a),STR(b),STR(c),STR(d),STR(e), STR(((60+n+13)&15)), STR(((60+n+8)&15)), STR(((60+n+2)&15)), STR(((60+n)&15)), STR(RCONST)) | ||||
| #define RD4B(a,b,c,d,e, n) RD4Bs(STR(a),STR(b),STR(c),STR(d),STR(e), STR(((60+n+13)&15)), STR(((60+n+8)&15)), STR(((60+n+2)&15)), STR(((60+n)&15)), STR(RCONST)) | ||||
| #undef  RCONST | ||||
| //#define RCONST 0xCA62C1D6 "out of range for signed 32bit displacement" | ||||
| #define RCONST  -0x359d3e2a | ||||
| 	RD4A(ax,bx,cx,dx,bp, 0) RD4A(bp,ax,bx,cx,dx, 1) RD4A(dx,bp,ax,bx,cx, 2) RD4A(cx,dx,bp,ax,bx, 3) RD4A(bx,cx,dx,bp,ax, 4) | ||||
| 	RD4A(ax,bx,cx,dx,bp, 5) RD4A(bp,ax,bx,cx,dx, 6) RD4A(dx,bp,ax,bx,cx, 7) RD4A(cx,dx,bp,ax,bx, 8) RD4A(bx,cx,dx,bp,ax, 9) | ||||
| 	RD4A(ax,bx,cx,dx,bp,10) RD4A(bp,ax,bx,cx,dx,11) RD4A(dx,bp,ax,bx,cx,12) RD4A(cx,dx,bp,ax,bx,13) RD4A(bx,cx,dx,bp,ax,14) | ||||
| 	RD4A(ax,bx,cx,dx,bp,15) RD4A(bp,ax,bx,cx,dx,16) RD4B(dx,bp,ax,bx,cx,17) RD4B(cx,dx,bp,ax,bx,18) RD4B(bx,cx,dx,bp,ax,19) | ||||
|  | ||||
| "\n\ | ||||
| 	popq	%rdi		#                                   \n\ | ||||
| 	addl	%eax, 80(%rdi)  # ctx->hash[0] += a                 \n\ | ||||
| 	addl	%ebx, 84(%rdi)  # ctx->hash[1] += b                 \n\ | ||||
| 	addl	%ecx, 88(%rdi)  # ctx->hash[2] += c                 \n\ | ||||
| 	addl	%edx, 92(%rdi)  # ctx->hash[3] += d                 \n\ | ||||
| 	addl	%ebp, 96(%rdi)  # ctx->hash[4] += e                 \n\ | ||||
| 	popq	%rbx		#                                   \n\ | ||||
| 	popq	%rbp		#                                   \n\ | ||||
| 	popq	%r12		#                                   \n\ | ||||
| 	popq	%r13		#                                   \n\ | ||||
| 	popq	%r14		#                                   \n\ | ||||
| 	popq	%r15		#                                   \n\ | ||||
| " | ||||
| 	); /* asm */ | ||||
| #undef RCONST | ||||
| } | ||||
| # else | ||||
| /* Fast, fully-unrolled SHA1. +3800 bytes of code on x86. | ||||
|  * It seems further speedup can be achieved by handling more than | ||||
|   | ||||
							
								
								
									
										1349
									
								
								libbb/hash_md5_sha_x86-64.S
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1349
									
								
								libbb/hash_md5_sha_x86-64.S
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
		Reference in New Issue
	
	Block a user